In [None]:
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from openai import OpenAI
import os
import google.generativeai as genai
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
dataset = load_dataset("glue", "sst2")

In [None]:
df = pd.DataFrame(dataset["train"])
df.head()

In [None]:
def preprocess_sst_data(df):
    df = df.copy()

    # mark 0 label as negative and 1 as positive
    df['label'] = df['label'].map({0: 'negative', 1: 'positive'})

    # remove duplicates
    df.drop_duplicates(subset=['sentence'], inplace=True)

    # Drop empty tweets (after stripping)
    df['sentence'] = df['sentence'].str.strip()
    df = df[df['sentence'] != ""]

    # Convert tweets and sentiment to lowercase and strip whitespace
    df['sentence'] = df['sentence'].str.strip().str.lower()
    df['label'] = df['label'].str.strip().str.lower()

    return df

In [None]:
df = preprocess_sst_data(df)
df.head()
len(df)

In [None]:
plt.figure(figsize=(6, 3))
df['label'].value_counts().plot(kind='bar')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Sentiment Distribution in SST-2 Dataset')
plt.show()

In [None]:
load_dotenv()
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [None]:
def get_sentiment(sentence, no_of_shots=0):
    prompt_zero_shot = f"""Classify the sentiment of the following tweet as positive, or negative.\nGive only the sentiment as single word output in lowercase. This is a strict requirement.\nTarget Sentence: "{sentence}"\nSentiment:"""
    
    few_shot_examples = """Use the following tweets as example for few shot learning. Each example is followed by a sentiment label."""

    isFewShot = no_of_shots > 0
    if no_of_shots:
        while no_of_shots:
            positive_sample = df[df['label'] == 'positive']['sentence'].sample(1).values[0]
            while positive_sample == sentence:
                positive_sample = df[df['label'] == 'positive']['sentence'].sample(1).values[0]
            negative_sample = df[df['label'] == 'negative']['sentence'].sample(1).values[0]
            while negative_sample == sentence:
                negative_sample = df[df['label'] == 'negative']['sentence'].sample(1).values[0]
            
            few_shot_examples += """\nExample: {}\nSentiment: {}""".format(positive_sample, 'positive')
            few_shot_examples += """\nExample: {}\nSentiment: {}""".format(negative_sample, 'negative')
            no_of_shots -= 1
    
    prompt_few_shot = few_shot_examples + "\n"+prompt_zero_shot
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": "You are a sentiment analysis expert."},
                  {"role": "user", "content": prompt_few_shot if isFewShot else prompt_zero_shot}],
        temperature=0
    )

    sentiment = response.choices[0].message.content.strip()

    isPositive =  'positive' in sentiment.lower()
    isNegative = "negative" in sentiment.lower()

    if not isPositive and not isNegative:
            if ":" in sentiment:
                sentiment = sentiment.split(":")[1].strip()
            if "." in sentiment:
                sentiment = sentiment.split(".")[0].strip()
            if not sentiment:
                sentiment = 'invalid'
    # if more than one word in sentiment or sentiment is not in ['positive', 'negative'], mark as invalid
    if (isPositive and isNegative):
        sentiment = 'invalid'
    print(sentence, sentiment)
    return sentiment.lower()

In [None]:
df_sample = pd.read_csv('sst_sample.csv') # replace with your own batched sample

In [None]:
# zero shot gpt
zero_shot_predictions = [get_sentiment(tweet) for tweet in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
zero_shot_accuracy = accuracy_score(y_true, zero_shot_predictions)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, zero_shot_predictions, digits=4))
print(confusion_matrix(y_true, zero_shot_predictions))

In [None]:
# one shot gpt
one_shot_predictions = [get_sentiment(sentence, no_of_shots=1) for sentence in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
one_shot_accuracy = accuracy_score(y_true, one_shot_predictions)
print("\nAccuracy of one-shot classification on sst2, gpt-4o", one_shot_accuracy)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, one_shot_predictions, digits=4))
print(confusion_matrix(y_true, one_shot_predictions))

In [None]:
# three shot gpt
three_shot = [get_sentiment(sentence, no_of_shots=3) for sentence in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
three_shot_accuracy = accuracy_score(y_true, three_shot)
print("\nAccuracy of three-shot classification on sst2, gpt-4o", three_shot_accuracy)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, three_shot, digits=4))
print(confusion_matrix(y_true, three_shot))

In [None]:
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

In [None]:
def get_sentiment_gemini(sentence, no_of_shots=0):
    prompt_zero_shot = f"""Classify the sentiment of the following tweet as positive, or negative.\nGive only the sentiment as single word output in lowercase. This is a strict requirement.\nTarget Sentence: "{sentence}"\nSentiment:"""
    
    few_shot_examples = """Use the following tweets as example for few shot learning. Each example is followed by a sentiment label."""

    isFewShot = no_of_shots > 0
    if no_of_shots:
        while no_of_shots:
            positive_sample = df[df['label'] == 'positive']['sentence'].sample(1).values[0]
            while positive_sample == sentence:
                positive_sample = df[df['label'] == 'positive']['sentence'].sample(1).values[0]
            negative_sample = df[df['label'] == 'negative']['sentence'].sample(1).values[0]
            while negative_sample == sentence:
                negative_sample = df[df['label'] == 'negative']['sentence'].sample(1).values[0]
            
            few_shot_examples += """\nExample: {}\nSentiment: {}""".format(positive_sample, 'positive')
            few_shot_examples += """\nExample: {}\nSentiment: {}""".format(negative_sample, 'negative')
            no_of_shots -= 1
    
    prompt_few_shot = few_shot_examples + "\n"+prompt_zero_shot
    
    model = genai.GenerativeModel(model_name="models/gemini-1.5-pro")
    response = model.generate_content(prompt_few_shot if isFewShot else prompt_zero_shot)
    sentiment = response.text.strip().lower()   
    

    isPositive =  'positive' in sentiment.lower()
    isNegative = "negative" in sentiment.lower()

    if not isPositive and not isNegative:
            if ":" in sentiment:
                sentiment = sentiment.split(":")[1].strip()
            if "." in sentiment:
                sentiment = sentiment.split(".")[0].strip()
            if not sentiment:
                sentiment = 'invalid'
    # if more than one word in sentiment or sentiment is not in ['positive', 'negative'], mark as invalid
    if (isPositive and isNegative):
        sentiment = 'invalid'
    print(sentence, sentiment)
    return sentiment.lower()

In [None]:
# zero shot on gemini
zero_shot_predictions = [get_sentiment_gemini(tweet) for tweet in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
zero_shot_accuracy = accuracy_score(y_true, zero_shot_predictions)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, zero_shot_predictions, digits=4))
print(confusion_matrix(y_true, zero_shot_predictions))

In [None]:
# one shot gemini
one_shot_predictions = [get_sentiment_gemini(sentence, no_of_shots=1) for sentence in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
one_shot_accuracy = accuracy_score(y_true, one_shot_predictions)
print("\nAccuracy of one-shot classification on sst2, gpt-4o", one_shot_accuracy)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, one_shot_predictions, digits=4))
print(confusion_matrix(y_true, one_shot_predictions))

In [None]:
# three shot gemini
three_shot = [get_sentiment_gemini(sentence, no_of_shots=3) for sentence in df_sample['sentence']]
y_true = df_sample['label'].str.lower()
three_shot_accuracy = accuracy_score(y_true, three_shot)
print("\nAccuracy of three-shot classification on sst2, gpt-4o", three_shot_accuracy)
print("\nZero-Shot Classification Report:\n", classification_report(y_true, three_shot, digits=4))
print(confusion_matrix(y_true, three_shot))