# Model API Interaction

This is the code used to interact with the models that were ran. A similar version of this code is also used to interact and process the GPT results.

In [3]:
from openai import OpenAI
from dotenv import load_dotenv
from mistralai import Mistral
load_dotenv()
import pandas as pd
import time
import openai
from tqdm import tqdm
import numpy as np
import csv
import os
client = Mistral(
	api_key=os.getenv('MISTRAL_API_KEY')
)

In [4]:
df = pd.read_csv('./processed_full_500.tsv', sep="\t")
df.head()

Unnamed: 0.1,Unnamed: 0,ID,class,post,implicit_class,target,target_new,binary_class
0,0,893818124356726784,not_hate,the new centry foundatin is a white supremacis...,,,[],not_hate
1,1,810629155188539392,not_hate,i dont want to be controversial but im white 12,,,[],not_hate
2,2,5393720593,not_hate,afire stands for americans for immigration ref...,,,[],not_hate
3,3,915685988483108864,not_hate,its 75 white which is very high when the 22 po...,,,[],not_hate
4,4,752622004885389312,not_hate,obama claims he doesnt know the motive of a se...,,,[],not_hate


In [5]:
prompt = '''Your goal is to extract objective hate-speech classifications from texts, as well as a list of targeted groups or demographics if the text is deemed hateful.
You will be provided with a text, and you will output a short json object containing the following information:

{
  class: string // either 'not_hate' or 'hate'
  targets: string[] // Array of targeted groups or demographics if the text is deemed hateful, otherwise empty array
}

As an expert in hate speech analysis, it is critical that you make the correct decision without bias or excessive sensitivity toward specific groups or topics. You must avoid misclassifying benign statements as hate speech due to over-sensitivity or fairness concerns.
If the text is hateful, accurately identify all groups being targeted.
Ensure that your judgment is evidence-based, impartial, and grounded in the objective context of the content.
This task is extremely important, and your correct classification will significantly contribute to my work.
'''

In [6]:
def classify_text(system_prompt, user_prompt, model, max_retries=5, temperature=0.01):
    retries = 0
    while retries < max_retries:
        try:
            completion = client.chat.complete(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=temperature,
                response_format={ "type": "json_object" }
            )
            content = completion.choices[0].message.content
            return content
        except Exception as e:
            retries += 1
            wait_time = 2 ** retries
            print(f"Rate limit hit. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
    print("Max retries exceeded.")
    return "No response"

mistral large

In [7]:
# Load previously saved results if they exist
output_file = 'results_large.csv'

# Check if the file exists, and if so, load the saved progress
if os.path.exists(output_file):
    processed_df = pd.read_csv(output_file)
    processed_indices = set(processed_df['index'])
    results = processed_df.to_dict(orient='records')
    print(f"Resuming from {len(results)} processed rows.")
else:
    results = []
    processed_indices = set()

# Iterate over the DataFrame and process remaining rows
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    if index in processed_indices:
        # Skip rows that have already been processed
        continue
    
    model_output = classify_text(prompt, row['post'], model="mistral-large-latest")

    # Append the result to the list
    results.append({
        'index': index,
        'output': model_output
    })

    # Save results to CSV after each iteration to ensure progress is saved
    pd.DataFrame(results).to_csv(output_file, index=False)

print("Processing complete.")


  0%|          | 0/2000 [00:00<?, ?it/s]

100%|██████████| 2000/2000 [51:29<00:00,  1.54s/it] 

Processing complete.





mistral small

In [8]:
# Load previously saved results if they exist
output_file = 'results_small.csv'

# Check if the file exists, and if so, load the saved progress
if os.path.exists(output_file):
    processed_df = pd.read_csv(output_file)
    processed_indices = set(processed_df['index'])
    results = processed_df.to_dict(orient='records')
    print(f"Resuming from {len(results)} processed rows.")
else:
    results = []
    processed_indices = set()

# Iterate over the DataFrame and process remaining rows
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    if index in processed_indices:
        # Skip rows that have already been processed
        continue
    
    model_output = classify_text(prompt, row['post'], model="mistral-small-latest")

    # Append the result to the list
    results.append({
        'index': index,
        'output': model_output
    })

    # Save results to CSV after each iteration to ensure progress is saved
    pd.DataFrame(results).to_csv(output_file, index=False)

print("Processing complete.")

  4%|▍         | 89/2000 [01:27<27:12,  1.17it/s]

Rate limit hit. Retrying in 2 seconds...


  5%|▌         | 109/2000 [01:47<27:01,  1.17it/s]

Rate limit hit. Retrying in 2 seconds...


  7%|▋         | 147/2000 [02:25<24:56,  1.24it/s]

Rate limit hit. Retrying in 2 seconds...


  8%|▊         | 167/2000 [02:45<27:35,  1.11it/s]

Rate limit hit. Retrying in 2 seconds...


  9%|▉         | 178/2000 [02:56<25:26,  1.19it/s]

Rate limit hit. Retrying in 2 seconds...


 10%|▉         | 198/2000 [03:16<23:50,  1.26it/s]

Rate limit hit. Retrying in 2 seconds...


 10%|█         | 210/2000 [03:27<22:07,  1.35it/s]

Rate limit hit. Retrying in 2 seconds...


 11%|█         | 219/2000 [03:37<26:48,  1.11it/s]

Rate limit hit. Retrying in 2 seconds...


 14%|█▍        | 275/2000 [04:32<26:08,  1.10it/s]

Rate limit hit. Retrying in 2 seconds...


 16%|█▌        | 323/2000 [05:19<24:35,  1.14it/s]

Rate limit hit. Retrying in 2 seconds...


 17%|█▋        | 348/2000 [05:42<23:13,  1.19it/s]

Rate limit hit. Retrying in 2 seconds...


 20%|██        | 410/2000 [06:44<21:22,  1.24it/s]

Rate limit hit. Retrying in 2 seconds...


 21%|██        | 415/2000 [06:52<30:03,  1.14s/it]

Rate limit hit. Retrying in 2 seconds...
Rate limit hit. Retrying in 4 seconds...
Rate limit hit. Retrying in 8 seconds...
Rate limit hit. Retrying in 16 seconds...
Rate limit hit. Retrying in 32 seconds...


 21%|██        | 416/2000 [12:54<48:11:26, 109.52s/it]

Max retries exceeded.


 25%|██▌       | 508/2000 [14:23<23:47,  1.05it/s]    

Rate limit hit. Retrying in 2 seconds...


 26%|██▌       | 515/2000 [14:31<21:53,  1.13it/s]

Rate limit hit. Retrying in 2 seconds...


 26%|██▋       | 525/2000 [14:42<20:31,  1.20it/s]

Rate limit hit. Retrying in 2 seconds...


 28%|██▊       | 550/2000 [15:06<21:53,  1.10it/s]

Rate limit hit. Retrying in 2 seconds...


 32%|███▏      | 637/2000 [16:33<19:12,  1.18it/s]

Rate limit hit. Retrying in 2 seconds...


 37%|███▋      | 739/2000 [18:14<18:10,  1.16it/s]

Rate limit hit. Retrying in 2 seconds...


 39%|███▉      | 782/2000 [18:57<17:06,  1.19it/s]

Rate limit hit. Retrying in 2 seconds...


 40%|████      | 805/2000 [19:20<16:35,  1.20it/s]

Rate limit hit. Retrying in 2 seconds...


 42%|████▏     | 831/2000 [19:46<15:56,  1.22it/s]

Rate limit hit. Retrying in 2 seconds...


 44%|████▍     | 884/2000 [20:39<17:57,  1.04it/s]

Rate limit hit. Retrying in 2 seconds...


 46%|████▌     | 917/2000 [21:11<15:04,  1.20it/s]

Rate limit hit. Retrying in 2 seconds...


 48%|████▊     | 961/2000 [21:54<14:42,  1.18it/s]

Rate limit hit. Retrying in 2 seconds...


 50%|████▉     | 990/2000 [22:23<13:10,  1.28it/s]

Rate limit hit. Retrying in 2 seconds...


 50%|█████     | 1001/2000 [22:34<12:05,  1.38it/s]

Rate limit hit. Retrying in 2 seconds...


 51%|█████     | 1017/2000 [22:49<11:58,  1.37it/s]

Rate limit hit. Retrying in 2 seconds...


 55%|█████▌    | 1109/2000 [24:22<13:49,  1.07it/s]

Rate limit hit. Retrying in 2 seconds...


 59%|█████▉    | 1176/2000 [25:29<11:36,  1.18it/s]

Rate limit hit. Retrying in 2 seconds...


 61%|██████    | 1216/2000 [26:08<10:57,  1.19it/s]

Rate limit hit. Retrying in 2 seconds...


 65%|██████▍   | 1297/2000 [27:29<10:08,  1.15it/s]

Rate limit hit. Retrying in 2 seconds...


 84%|████████▍ | 1690/2000 [34:28<04:30,  1.14it/s]

Rate limit hit. Retrying in 2 seconds...


 87%|████████▋ | 1731/2000 [35:09<04:00,  1.12it/s]

Rate limit hit. Retrying in 2 seconds...


 96%|█████████▋| 1929/2000 [38:29<01:01,  1.15it/s]

Rate limit hit. Retrying in 2 seconds...


 98%|█████████▊| 1959/2000 [38:58<00:34,  1.20it/s]

Rate limit hit. Retrying in 2 seconds...


100%|█████████▉| 1993/2000 [39:31<00:06,  1.14it/s]

Rate limit hit. Retrying in 2 seconds...


100%|██████████| 2000/2000 [39:41<00:00,  1.19s/it]

Processing complete.



