In [1]:
import datetime
import time
!pip install numpy
!pip install pandas
!pip install tables
import pandas as pd
import requests
import os
from dotenv import load_dotenv
import json



In [2]:
df = pd.read_pickle('processed_data.pkl')
training_df = df[df["train"]]
testing_df = df[df["test"]]

In [3]:
from pathlib import Path

PROMT_DIR_PATH=Path("prompt-variations")

PROMT_PATHS = sorted(PROMT_DIR_PATH.glob("v*.txt"), key=lambda f:int(f.name.strip("v.txt")))
SYSTEM_PROMPT = [f.read_text() for f in PROMT_PATHS]

In [4]:
load_dotenv()
PERPLEXITY_API_KEY = os.getenv("PPLX_KEY")
VALID_MODELS = ['codellama-34b-instruct', 'llama-2-70b-chat', 'mistral-7b-instruct', 'mixtral-8x7b-instruct',
                'pplx-7b-chat', 'pplx-70b-chat']


def call_api(model: str, promt: str, text: str) -> requests.Response:
    payload = {
        "model": model,
        "messages": [
            {
                "role": "system",
                "content": promt
            },
            {
                "role": "user",
                "content": text
            }
        ]
    }
    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "Authorization": f"Bearer {PERPLEXITY_API_KEY}"
    }

    return requests.post("https://api.perplexity.ai/chat/completions", json=payload, headers=headers)


def run_model(prompt_id, model, sample_size, print_results=False):
    if model not in VALID_MODELS:
        raise ValueError(f'Invalid model {model}. Valid options are {VALID_MODELS}')

    sample_df = testing_df.sample(n=sample_size)

    results = []
    start_time = datetime.datetime.now()
    for i, (row_index, row) in enumerate(sample_df.iterrows()):
        total = sample_df["text"].count()
        counter = i + 1
        elapsed = datetime.datetime.now() - start_time
        percentage = counter / total
        s_per_gen=elapsed / counter
        print(f'[{elapsed}<{s_per_gen * (total - counter)}, {s_per_gen}s/generations] '
              f'{model} - promt {prompt_id}: {counter}/{total} | {percentage * 100:.2f}%')
        backoff_time = 10
        while True:
            response = call_api(model, SYSTEM_PROMPT[prompt_id], row["text"])
            if response.status_code == 200:
                break
            print(
                f"{model} (Promt {prompt_id}): Error {response.status_code} => sleeping for {backoff_time}s: {response.text}")
            time.sleep(backoff_time)
            backoff_time = min(int(1.2 * backoff_time), 60)
        data = json.loads(response.text)
        answer = data['choices'][0]['message']['content']
        results.append({
            'prompt_id': prompt_id,
            'model': model,
            'sample_size': sample_size,
            "text": row["text"],
            "answer": answer,
            "labeled_hateful": row["hate"]
        })
        if print_results:
            print(row["text"])
            if lines := [line for line in answer.split("\n") if "hate_speech_probability" in line]:
                print(lines)
            else:
                print(answer)

    return pd.DataFrame(results)

In [5]:
from concurrent.futures import ThreadPoolExecutor

total_samples = len(testing_df)
all_runs = pd.read_pickle("all_runs.pkl")

CONFIG = [
    (2, 'mistral-7b-instruct', total_samples),
    (3, 'mistral-7b-instruct', total_samples),
    (4, 'mistral-7b-instruct', total_samples),

]
with ThreadPoolExecutor(max_workers=3) as executor:
    for run in executor.map(lambda config: run_model(*config), CONFIG):
        all_runs = pd.concat([all_runs, run])
        pd.DataFrame(all_runs).to_pickle("all_runs.pkl")

[0:00:00.000391<0:00:00.000390, 0:00:00.000391s/generations] mistral-7b-instruct - promt 2: 1/478 | 0.21%
[0:00:00.000805<0:00:00.000803, 0:00:00.000805s/generations] mistral-7b-instruct - promt 3: 1/478 | 0.21%
[0:00:00.001059<0:00:00.001057, 0:00:00.001059s/generations] mistral-7b-instruct - promt 4: 1/478 | 0.21%
[0:00:03.594943<0:00:03.579901, 0:00:01.797472s/generations] mistral-7b-instruct - promt 2: 2/478 | 0.42%
[0:00:03.768449<0:00:03.752681, 0:00:01.884224s/generations] mistral-7b-instruct - promt 4: 2/478 | 0.42%
[0:00:05.281809<0:00:05.259709, 0:00:02.640904s/generations] mistral-7b-instruct - promt 3: 2/478 | 0.42%
[0:00:07.190704<0:00:07.145574, 0:00:02.396901s/generations] mistral-7b-instruct - promt 2: 3/478 | 0.63%
[0:00:07.780872<0:00:07.732038, 0:00:02.593624s/generations] mistral-7b-instruct - promt 4: 3/478 | 0.63%
[0:00:09.113577<0:00:09.056379, 0:00:03.037859s/generations] mistral-7b-instruct - promt 3: 3/478 | 0.63%
[0:00:11.262155<0:00:11.167911, 0:00:02.815539

In [6]:
all_runs

Unnamed: 0,prompt_id,model,sample_size,text,answer,labeled_hateful
0,0,mistral-7b-instruct,478,So you would refuse even other Nordic whites f...,"{ \n ""relevant_facts"": [\n ""The sentence e...",True
1,0,mistral-7b-instruct,478,Just Google it and you 'll see what I mean .,"{ \n ""relevant_facts"": [\n ""The sentence c...",False
2,0,mistral-7b-instruct,478,"During the early punk scene , he was always se...","{ \n ""relevant_facts"": [\n ""The person in ...",False
3,0,mistral-7b-instruct,478,"DuetschGirlTX, No I live in a tiny ass town wh...","{ \n ""relevant_facts"": [""The speaker is ident...",True
4,0,mistral-7b-instruct,478,YouTube - Teacher Attacked for Being White 9 y...,"```json\n{ \n ""relevant_facts"": [\n ""A 9-y...",False
...,...,...,...,...,...,...
473,4,mistral-7b-instruct,478,Only scored a 120 .,"{\n""relevant_word_definitions"": {\n""scored"": ""...",False
474,4,mistral-7b-instruct,478,The White race is best because when the Creato...,"```json\n{\n ""relevant_word_definitions"": {\n...",True
475,4,mistral-7b-instruct,478,I tried to sign up there once & they refused t...,"{\n""relevant_word_definitions"": {\n},\n""contai...",False
476,4,mistral-7b-instruct,478,"I'm more of a grappler than a striker , I woul...","{\n""relevant_word_definitions"": {},\n""containe...",False
