In [74]:
from openai import OpenAI
from datasets import Dataset
from tqdm import tqdm
from sklearn.metrics import classification_report

import os
import pandas as pd
%load_ext autoreload
%autoreload 2
from few_shot_testing import load_data

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
train_path = 'data/train_data.csv'
test_path = 'data/test_data.csv'

int2str = {-1: 'negative', 0: 'neutral', 1: 'positive'}
str2int = {v:k for k, v in int2str.items()}

train_df, test_df = pd.read_csv(train_path), pd.read_csv(test_path)
train_df['label'] = train_df['label'].map(int2str)

train_dataset = Dataset.from_pandas(train_df) 
test_dataset =  Dataset.from_pandas(test_df)

In [57]:
def generate_prompt(test_sample, dataset, num_few_shot_samples=3):
    chosen_sample_idx = [2, 11, 13, 12, 15]
    texts, labels = dataset['text'], dataset['label']

    task_description = """Perform Sentiment classification task.
Given the text assign a sentiment label from ['negative', 'positive', 'neutral'].
Return label only without any other text.\n"""

    for i in range(num_few_shot_samples + 1):
        if i != num_few_shot_samples:
            id = chosen_sample_idx[i]
        text, label = texts[id], labels[id]

        if i == num_few_shot_samples:
            task_description += f"\n<text>: {test_sample}\n<sentiment>:"
        else:
            task_description += f"\n<text>: {text}\n<sentiment>: {label}\n"

    return task_description

In [77]:
test_texts, golden_labels = test_dataset['text'], test_dataset['label']
predicted_labels = []

os.environ['OPENAI_API_KEY'] = "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"
client = OpenAI()

for text in tqdm(test_texts):
    prompt = generate_prompt(text , train_dataset, 5)

    completion  = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    predicted_label = completion.choices[0].message.content
    predicted_labels.append(str2int[predicted_label])

100%|██████████| 70/70 [00:35<00:00,  1.98it/s]


In [78]:
print(classification_report(golden_labels, predicted_labels, digits=4))

              precision    recall  f1-score   support

          -1     0.8696    0.9524    0.9091        21
           0     0.7586    0.8462    0.8000        26
           1     0.9444    0.7391    0.8293        23

    accuracy                         0.8429        70
   macro avg     0.8575    0.8459    0.8461        70
weighted avg     0.8530    0.8429    0.8423        70

