# Test GPT in Python

In [1]:
import pandas as pd
import numpy as np
import re
from openai import OpenAI

## Import Data

In [2]:
df = pd.read_csv("./sst_dataset/SST2-Data/SST2-Data/stanfordSentimentTreebank/stanfordSentimentTreebank/datasetSentences.txt", sep="\t", index_col=0)
datasetSplit = pd.read_csv("./sst_dataset/SST2-Data/SST2-Data/stanfordSentimentTreebank/stanfordSentimentTreebank/datasetSplit.txt", index_col=0)

#### The sentiments of the sentences are in this file in the beginning of each line, for example: "( 1 ..."

In [3]:
file_path = "./sst_dataset/SST2-Data/SST2-Data/trainDevTestTrees_PTB/trees/test.txt"

# Initialize an empty list to store the first numbers
first_numbers = []

# Open the file and read it line by line
with open(file_path, 'r') as file:
    for line in file:
        # Find the first occurrence of a number in each line
        match = re.search(r'\d+', line)
        if match:
            # Append the found number to the list
            first_numbers.append(int(match.group()))

# Create a DataFrame from the list of first numbers
labels_test = pd.DataFrame(first_numbers, columns=['sentiment_labels'])

### Seperate test data

In [4]:
df_test = df[datasetSplit['splitset_label'] == 2]
df_test

Unnamed: 0_level_0,sentence
sentence_index,Unnamed: 1_level_1
3,Effective but too-tepid biopic
4,If you sometimes like to go to the movies to h...
5,"Emerges as something rare , an issue movie tha..."
6,The film provides some great insight into the ...
7,Offers that rare combination of entertainment ...
...,...
11621,An imaginative comedy\/thriller .
11623,"-LRB- A -RRB- rare , beautiful film ."
11626,-LRB- An -RRB- hilarious romantic comedy .
11628,Never -LRB- sinks -RRB- into exploitation .


## gpt-3.5-turbo classification

In [5]:
client = OpenAI(
    api_key="api_key"
)

def classify_sentiment_zero_shot(sentence):
    prompt =    f"""Classify the sentiment of this sentence into one of the following categories: 
                very negative, negative, neutral, positive, very positive. Only answer with 0 for 
                very negative, 1 for negative, 2 for neutral, 3 for positive or 4 for very 
                positive. Sentence: '{sentence}'"""

    chat_completion = client.chat.completions.create(
        messages = [
            {
                "role":"user",
                "content":prompt
            },    
        ],
        model="gpt-3.5-turbo",
        max_tokens=1
    )

    return chat_completion.choices[0].message.content

In [6]:
#################################################################################################################
# ATTENTION! This line costs money!

#predicted_labels_list = [classify_sentiment_zero_shot(sentence) for sentence in df_test['sentence']]

# ATTENTION! This line costs money!
#################################################################################################################

### Create dataframe of predicted labels

In [33]:
predicted_labels_df = pd.DataFrame(predicted_labels_list, index=df_test.index, columns=['classified_sentiment'])
predicted_labels_df['classified_sentiment'] = pd.to_numeric(predicted_labels_df['classified_sentiment'], errors='coerce')
predicted_labels_df.reset_index(drop=True, inplace=True)
# For 'Wrong answers' that are not convertable to int
nan_count = predicted_labels_df.isna().sum().sum()
print(nan_count)

1


In [36]:
# Create a mask to exclute NaN values from mean calculation
mask = predicted_labels_df['classified_sentiment'].notna()
accuracy = (labels_test['sentiment_labels'][mask] == predicted_labels_df['classified_sentiment'][mask]).mean()
print("Accuracy: ", accuracy)

#comparison_df = pd.concat([labels_test.iloc[:20, :], predicted_labels_df], axis=1)
#print(comparison_df)

Accuracy:  0.47442281575373474


## Write predicted labels to file

In [37]:
predicted_labels_df.to_csv('./sst_dataset/SST2-Data/SST2-Data/stanfordSentimentTreebank/stanfordSentimentTreebank/predicted_sentiment_labels.txt', sep='\t', index=True)