# Import

In [1]:
import pathlib, os
import pandas as pd
import numpy as np
import google.generativeai as genai

from time import sleep

# Used to securely store my API key
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

# List of Models

*   gemini-pro: optimized for text-only prompts.
*   gemini-pro-vision: optimized for text-and-images prompts.





In [2]:
def list_genai_models():
    for m in genai.list_models():
        if 'generateContent' in m.supported_generation_methods:
            print(m.name)

list_genai_models()

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


# All Defined Functions

In [3]:
def read_file_as_string(filename):
  with open(filename, "r") as f:
    content = f.read()
  return content

def generate_response(prompt, text):
    response = model.generate_content([prompt, text])
    text_content = response.candidates[0].content.parts[0].text

    return text_content

def parse_predictions(response):
    # Split the string into pairs of 'label = value'
    pairs = response.split("\n")
    predictions_dict = {}

    for pair in pairs:
        if pair:  # This checks if the pair is not empty
            # Split each pair into label and value, then strip whitespace and remove the '='
            label, value = pair.split("=")
            label = label.strip()
            value = value.strip()
            predictions_dict[label] = int(value)  # Convert the value to integer

    return predictions_dict

# Classification

In [None]:
# Load model
model = genai.GenerativeModel('gemini-1.0-pro-latest')

# Load data from CSV file
dataset = pd.read_csv('/content/test_toxic_dataset_without_label.csv')
results = []
classes = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

max_retries = 5  # Maximum number of retries
wait_time = 60  # Waiting time in seconds between retries

for ind, data in dataset.iterrows():
  for i in range(1, 7):
    prompt = read_file_as_string(f"/content/prompt{i}")
    for attempt in range(max_retries + 1):
      try:
        response = generate_response(prompt, data['processed_text'])

        print(f"Index: {ind}\nText: {data['processed_text']}\n{response}\n")
        predictions = parse_predictions(response)

        for label, value in predictions.items():
            data[label] = value

        #results.append(data)
        break  # Successful response, exit loop
      except Exception as e:
        #print('Exception occurred:', e)
        if "429" in str(e):
          if attempt == max_retries:
            print(f"Error: Reached maximum retries for index: {ind}")
          else:
            print(f"Warning: Too many requests (attempt {attempt+1}/{max_retries}). Waiting {wait_time} seconds...")
            sleep(wait_time)  # Wait before retrying
        else: break

    if np.isnan(data[classes[i-1]]):
      print(f"Index: {ind}\nneed to input data for {classes[i-1]}\n")
      data[classes[i-1]] = -1
  results.append(data)

# Compile results into a DataFrame
result_df = pd.DataFrame(results)

# Save the result to a new CSV file
result_df.to_csv(f'labeled_dataset.csv', index=False, encoding='utf-8')
#####################################################################################################################

Index: 0
Text: havent quite reached technological singularity yet incorrect yet convincing answers ai still incorrect therefore useless chatgpt
toxic = 0

Index: 0
Text: havent quite reached technological singularity yet incorrect yet convincing answers ai still incorrect therefore useless chatgpt
severe_toxic = 0

Index: 0
Text: havent quite reached technological singularity yet incorrect yet convincing answers ai still incorrect therefore useless chatgpt
obscene = 0

Index: 0
need to input data for threat

Index: 0
Text: havent quite reached technological singularity yet incorrect yet convincing answers ai still incorrect therefore useless chatgpt
insult = 1

Index: 0
Text: havent quite reached technological singularity yet incorrect yet convincing answers ai still incorrect therefore useless chatgpt
identity_hate = 0

Index: 1
Text: chatgpt like donald trump convincing bullshit times even true
toxic = 0

Index: 1
Text: chatgpt like donald trump convincing bullshit times even true
se

# Classifying Again To Get Response for The Missed One

In [None]:
# Load data from CSV file
dataset = pd.read_csv('/content/labeled_dataset.csv')
results = []
classes = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

max_retries = 5  # Maximum number of retries
wait_time = 60  # Waiting time in seconds between retries

for ind, data in dataset.iterrows():
  for i in range(1, 7):
    prompt = read_file_as_string(f"/content/prompt{i}")
    if data[classes[i-1]] == 0 or data[classes[i-1]] == -1:
      print(f"prev response: {classes[i-1]} = {data[classes[i-1]]}")
      for attempt in range(max_retries + 1):
        try:
          response = generate_response(prompt, data['processed_text'])

          print(f"Index: {ind}\nText: {data['processed_text']}\n{response}\n")
          predictions = parse_predictions(response)

          for label, value in predictions.items():
              data[label] = value

          #results.append(data)
          break  # Successful response, exit loop
        except Exception as e:
          #print('Exception occurred:', e)
          if "429" in str(e):
            if attempt == max_retries:
              print(f"Error: Reached maximum retries for index: {ind}")
            else:
              print(f"Warning: Too many requests (attempt {attempt+1}/{max_retries}). Waiting {wait_time} seconds...")
              sleep(wait_time)  # Wait before retrying
          else: break

      # if np.isnan(data[classes[i-1]]):
      #   print(f"Index: {ind}\nneed to input data for {classes[i-1]}\n")
      #   data[classes[i-1]] = -1
  results.append(data)

# Compile results into a DataFrame
result_df = pd.DataFrame(results)

# Save the result to a new CSV file
result_df.to_csv(f'gemini_response.csv', index=False, encoding='utf-8')
#####################################################################################################################

# Deleting Invalid Response

In [None]:
# Load the data from the CSV files
df1 = pd.read_csv('/content/test_toxic_dataset.csv')
df2 = pd.read_csv('/content/gemini_response.csv')

# Select the columns to add from the first DataFrame
# Assuming the columns are named 'Column1', 'Column2', ..., 'Column6'
columns_to_add = df1[classes]

# Rename the selected columns
columns_to_add.columns = ['true_' + col for col in columns_to_add.columns]

# Concatenate the renamed columns to the second DataFrame
df2 = pd.concat([df2, columns_to_add], axis=1)

# Save the updated DataFrame back to a CSV file
df2.to_csv('gemini_response_final.csv', index=False)
