<a href="https://colab.research.google.com/github/kmk4444/Fine-tuning/blob/main/01_assign_labels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import the necessary libraries: os, pandas, streamlit, dotenv, and langchain_google_genai.
import os
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

# load_dotenv()
# my_key_google = os.getenv("google_apikey")

my_key_google="----"

llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=my_key_google)

#Instructions for the language model
system_prompt = """Sen bir Türk edebiyatı uzmanısın. Türk şiir literatürünü çok iyi biliyorsun.
Özellikle de Orhan Veli şiirlerini çok iyi biliyorsun.
Sana verilen Orhan Veli şiirlerinde ele alınan konuyu, temayı, duyguyu, şiirdeki başlıca motifleri tespit edebiliyorsun.
Yanıtını verirken bu tespit ettiğin konu, tema, duygu veya motifleri aralarında birer virgül olacak şekilde yazıyorsun.
Yanıtında sadece bunları yazıyorsun. Başka hiçbir açıklama ya da ek bilgi vermiyorsun.
"""

#Specific question to ask the language model
prompt = "Orhan Veli'nin aşağıdaki şiirinde ele alınan konu, tema, duygu veya motifleri yaz."

#Path to the input Excel file
source_file_path="./data/siir.xlsx"
#Path to the output Excel file
target_file_path=source_file_path

#Set the page title and header
st.set_page_config(page_title="Şiir Etiketleme Uygulaması")
st.title("Şiir Etiketleme Uygulaması")
st.divider()

start_row = st.number_input("Başlangıç Satırı",min_value=1, value=1) #Number input field for the starting row
end_row = st.number_input("Bitiş Satırı", min_value=1, value=10)#Number input field for the ending row
submit_btn = st.button(label="Etiketle")#Button to trigger the labeling process

if submit_btn:

  df = pd.read_excel(source_file_path, engine="openpyxl")

  progress_bar = st.progress(0)
  total_rows = end_row - start_row + 1

  for index, row in df.iloc[start_row-1:end_row].iterrows(): #This selects rows from start_row-1 (zero-based indexing) to end_row (exclusive) in the DataFrame.
  #This method iterates over the selected rows, providing the row index (index) and the row data (row) in each iteration.
    #This block creates a try-except loop to handle potential errors during processing.
    try:
      siir = row['siir'] #This line extracts the poem text from the current row using the column name 'siir' and assigns it to the variable siir.

      full_prompt = f"{system_prompt} {prompt} {siir}" # It uses f-strings to concatenate the system_prompt, prompt, and the extracted poem text(siir).
      #The resulting string is stored in the variable full_prompt.

      AI_response = llm_gemini.invoke(input=full_prompt)

      df.at[index, 'response'] = AI_response.content #It uses the at method to access a specific cell by its index (index) and column name ('response').

    #This lines catches any exceptions that might occur during processing (e.g., issues with the API call).
    except Exception as e:#The exception object is stored  in the variable e.
      st.error(f"Şu satırı işlerken hata oluştu: {index + 1}: {e}. Sonraki satıra geçiliyor...")
      continue #It also includes the specific exception message (e) and indicates that the script will continue to the next row.

    progress_percentage = (index + 1 - (start_row-1)) /total_rows # This line calculates the progress percentage based on the current row (index) and the total number of rows.
    progress_bar.progress(progress_percentage) # This line updates the progress bar using Streamlit's progress method.

  df.to_excel(target_file_path, index=False, engine="openpyxl") # This line saves modified DataFrame df back to an Excel file.
  #target_file_path: This specifies the path where the file will be saved (same path as the original file defined earlier).
  #index=False: This argument prevents the row index from being saved as a separate column in the Excel file.
  #engine="openpyxl" This option again specifies the library (openpyxl) used to write the data to the excel format.
  progress_bar.empty() # This line empties the progress bar after it has reached 100% completion. This removes the progress bar from the Streamlit interface.

  st.success("Etiketleme İşlemi Tamamlandı")

st.dataframe(pd.read_excel(target_file_path, engine="openpyxl")) # This part reads the newly saved Excel file back into a Pandas DataFrame.

