In [1]:
from openai import OpenAI
from datasets import Dataset
from tqdm import tqdm
from sklearn.metrics import classification_report

import os
import pandas as pd
%load_ext autoreload
%autoreload 2
#from few_shot_testing import load_data

In [2]:
def generate_prompt(test_sample, few_shot_data, num_few_shot_samples=3):
    
    task_description = """Perform Sentiment classification task.
Given the text assign a sentiment label from ['positive', 'negative', 'neutral'].
Return label only without any other text.\n"""

    for i in range(num_few_shot_samples + 1):
        if i != num_few_shot_samples:
            sample = few_shot_data[i]
            text, label = sample['text'], sample['label']

        if i == num_few_shot_samples:
            task_description += f"\n<text>: {test_sample}\n<sentiment>:"
        else:
            task_description += f"\n<text>: {text}\n<sentiment>: {label}\n"

    return task_description

In [5]:
train_path = 'data/train_data.csv'
test_path = 'data/test_data.csv'

int2str = {-1: 'negative', 0: 'neutral', 1: 'positive'}
str2int = {v:k for k, v in int2str.items()}

train_df, test_df = pd.read_csv(train_path), pd.read_csv(test_path)
train_df['label'] = train_df['label'].map(int2str)

train_dataset = Dataset.from_pandas(train_df) 
test_dataset =  Dataset.from_pandas(test_df)

In [6]:
# Manually chosen idx of few shot train samples (pos, neg, neut, post, neg, neut)
chosen_sample_idx = [6, 10, 5, 7, 60, 145]
chosen_samples = [train_dataset[i] for i in chosen_sample_idx]
num_few_shot_samples = [0, 1, 3, 6]

In [12]:
chosen_samples[2]

{'text': 'although i agree with the general principle that we should keep things simple, imo thats also on the user. if they want to use any option per-fragment, its on them if that over complicates their work. afacit, these options are largely opaque at the udp layer - they dont authomatically do anything unless its defined - as per gorrys draft. its up to him to decide if theres a limit to his api and algorithms that use req/res; others can do otherwise. again, udp is all about the u.',
 'label': 'neutral'}

In [13]:
test_texts, golden_labels = test_dataset['text'], test_dataset['label']
predicted_labels = []

os.environ['OPENAI_API_KEY'] = "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"
client = OpenAI()

for text in tqdm(test_texts):
    prompt = generate_prompt(text , [chosen_samples[2]], 1)

    completion  = client.chat.completions.create(
        model='gpt-3.5-turbo-1106',
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    predicted_label = completion.choices[0].message.content
    predicted_labels.append(str2int[predicted_label.strip()])

100%|██████████| 79/79 [00:31<00:00,  2.47it/s]


In [14]:
print(classification_report(golden_labels, predicted_labels, digits=4))

              precision    recall  f1-score   support

          -1     0.8519    1.0000    0.9200        23
           0     0.9583    0.7931    0.8679        29
           1     0.9286    0.9630    0.9455        27

    accuracy                         0.9114        79
   macro avg     0.9129    0.9187    0.9111        79
weighted avg     0.9172    0.9114    0.9096        79

