In [None]:
from openai import OpenAI
client = OpenAI()

import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

In [None]:

def get_prob(api_response, target_class):

    # Extract the top log probabilities
    top_logprobs = api_response.choices[0].logprobs.content[0].top_logprobs
    
    # Convert the log probabilities to probabilities for the specified target_class
    prob = [np.exp(x.logprob) for x in top_logprobs if x.token == target_class]

    # Check if the target class was found in the top_logprobs
    # If not, set the result (res) to 0, indicating the target class has a probability of 0.
    # Otherwise, set res to the first (and presumably only) probability found for target_class.
    if len(prob) == 0:
        res = 0
    else:
        res = prob[0]
    return res

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/pycaret/pycaret/master/datasets/amazon.csv')

df_0 = df[df.Positive==0].sample(100, random_state=42)
df_1 = df[df.Positive==1].sample(100, random_state=42)
df = pd.concat([df_0, df_1]).reset_index(drop=True)

In [None]:
df

In [None]:
system_prompt = """You are an expert in sentiment analysis. You will receive a text that you have to classify. 
- if the text is positive, then return 'positive'
- if the text is negative, then return 'negative' 
Return only 'positive' or 'negative'.
The output should have 8 characters and all in lowercase. No other values are allowed!
"""

api_response = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content":"Today I have dinner plans."}
        ],
    temperature=0,
    logprobs=True,
    top_logprobs=5
)
api_response.choices[0].logprobs.content[0].top_logprobs

In [None]:
prob_positive = get_prob(api_response, 'positive')
prob_negative = get_prob(api_response, 'negative')

sum_prob = prob_positive + prob_negative
prob_positive = prob_positive/sum_prob
prob_negative = prob_negative/sum_prob
print(f"{prob_positive=}")
print(f"{prob_negative=}")

############################

In [None]:
def gpt_sentimental_classif(text, model):
    
    # Define the system prompt that instructs the model on how to analyze sentiment.
    system_prompt = """You are an expert in sentiment analysis. You will receive a text that you have to classify. 
    - if the text is positive, then return 'positive'
    - if the text is negative, then return 'negative' 
    Return only 'positive' or 'negative'.
    The output should have 8 characters and all in lowercase. No other values are allowed!
    """

    # Request the model to classify the sentiment of the provided text.
    api_response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content":text}
            ],
        temperature=0,  # Ensure deterministic output for the sentiment analysis
        logprobs=True,  # Request log probabilities for better accuracy in classification
        top_logprobs=5  # Limit the number of top log probabilities returned
    )

    # Calculate the probabilities of the text being positive or negative.
    prob_positive = get_prob(api_response, 'positive')
    prob_negative = get_prob(api_response, 'negative')

    # Normalize the probabilities so that they sum up to 1.
    sum_prob = prob_positive + prob_negative
    prob_positive = prob_positive/sum_prob
    prob_negative = prob_negative/sum_prob

    # Return the normalized probabilities.
    return prob_positive, prob_negative


In [None]:
import os 
import pickle

def make_exp(model, target_file):
    
    # Check if results file already exists (to avoid redo & to reduce unnecessary costs).
    if os.path.exists(target_file):
        # Load existing results if file is present.
        with open(target_file, 'rb') as file:
            res = pickle.load(file)
    else:
        # Initialize results list and perform sentiment analysis on each review.
        res = []
        for i in range(len(df)):
            res.append(gpt_sentimental_classif(df.loc[i,'reviewText'], model))
        # Save the new results to file.
        with open(target_file, 'wb') as file:
            pickle.dump(res, file)
    
    # Convert results list to DataFrame.
    res = pd.DataFrame(res, columns=['prob_positive', 'prob_negative'])
    return res

probabilities_gpt_3 = make_exp('gpt-3.5-turbo', 'probabilities_gpt_3.pkl')
probabilities_gpt_4 = make_exp('gpt-4', 'probabilities_gpt_4.pkl')


In [None]:
probabilities_gpt_3

In [None]:
probabilities_gpt_4

In [None]:
fpr_3, tpr_3, _ = roc_curve(df['Positive'], probabilities_gpt_3['prob_positive'])
roc_auc_3 = auc(fpr_3, tpr_3)

fpr_4, tpr_4, _ = roc_curve(df['Positive'], probabilities_gpt_4['prob_positive'])
roc_auc_4 = auc(fpr_4, tpr_4)

In [None]:
plt.figure()

plt.plot(fpr_3, tpr_3, color='orange',
         lw=2, label='ROC curve GPT-3.5 Turbo (area = %0.4f)' % roc_auc_3)

plt.plot(fpr_4, tpr_4, color='blue',
         lw=2, label='ROC curve GPT-4 (area = %0.4f)' % roc_auc_4)

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

In [None]:
predictions_gpt_3 = probabilities_gpt_3['prob_positive'].apply(lambda x: 1 if x >= 0.5 else 0)
predictions_gpt_4 = probabilities_gpt_4['prob_positive'].apply(lambda x: 1 if x >= 0.5 else 0)

accuracy_gpt_3 = (predictions_gpt_3 == df['Positive']).mean()
accuracy_gpt_4 = (predictions_gpt_4 == df['Positive']).mean()

print(f'Accuracy of GPT-3 model: {accuracy_gpt_3}')
print(f'Accuracy of GPT-4 model: {accuracy_gpt_4}')
