In [None]:
!pip install openai


In [None]:
from google.colab import userdata
import openai

# 👇 Fetch API key from Colab sidebar secrets
api_key = userdata.get("OPENAI_API_KEY")

# ✅ Set it for OpenAI client
openai.api_key = api_key


In [None]:
print("Key loaded? ✅" if openai.api_key else "Key missing ❌")


In [None]:
from openai import OpenAI
from google.colab import userdata

# 👇 Fetch from Colab secret storage
api_key = userdata.get("OPENAI_API_KEY")

# ✅ Initialize client
client = OpenAI(api_key=api_key)

def ask_gpt(prompt, temperature=0.7, top_p=0.9, max_tokens=200, model="gpt-3.5-turbo"):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are an expert in aspect-based sentiment analysis."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content


In [None]:
review = "The battery life is terrible but the display quality is outstanding."

prompt = f"""
Extract all aspects from the following review, and classify each as Positive, Negative, or Neutral.
Return output in JSON format like this:
[
  {{"aspect": "battery", "sentiment": "negative"}},
  {{"aspect": "screen", "sentiment": "positive"}}
]

Review: "{review}"
"""

print(ask_gpt(prompt, temperature=0.3, top_p=0.9))


In [None]:
!pip install datasets

from datasets import load_dataset
dataset = load_dataset("tomaarsen/setfit-absa-semeval-restaurants")
print(dataset)


In [None]:
df = dataset["train"].to_pandas()
df[["text", "span", "label"]].sample(5)


In [None]:
import pandas as pd
sample_df = df.sample(30, random_state=42)  # use a small sample first


In [None]:
sample_df.head()

In [None]:
def get_prompt(method, sentence, aspect):
    if method == "zero-shot":
        return f"""
Review: "{sentence}"

What is the sentiment (positive, negative, or neutral) about the aspect: "{aspect}"?

Only return the sentiment word.
"""

    elif method == "few-shot":
        return f"""
Examples:
Review: "The service was poor." → Aspect: service → Sentiment: negative
Review: "Loved the food." → Aspect: food → Sentiment: positive

Now analyze this:
Review: "{sentence}"
Aspect: "{aspect}"
Sentiment:"""

    elif method == "cot":
        return f"""
Let's analyze this step-by-step.

Review: "{sentence}"
Aspect: "{aspect}"

Step 1: Identify how the sentence describes the aspect.
Step 2: Determine whether the sentiment is positive, negative, or neutral.
Step 3: Return the sentiment.

Answer:"""


In [None]:
from openai import OpenAI

client = OpenAI(api_key=openai.api_key)  # assuming you've set api_key earlier

def ask_gpt(prompt, temperature=0.3, top_p=0.9):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        top_p=top_p,
        max_tokens=100
    )
    return response.choices[0].message.content.strip().lower()


In [None]:
import pandas as pd

# Sample 10 reviews from your labeled dataset
sample_df = df.sample(10, random_state=42)

# Prompting function
def get_prompt(method, sentence, aspect):
    return f"""
Review: "{sentence}"

What is the sentiment (positive, negative, or neutral) about the aspect: "{aspect}"?

Only return the sentiment word.
"""

# Ask GPT
from openai import OpenAI
client = OpenAI(api_key=openai.api_key)

def ask_gpt(prompt, temperature=0.3, top_p=0.9):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        top_p=top_p,
        max_tokens=100
    )
    return response.choices[0].message.content.strip().lower()

# Run small experiment
results = []
for _, row in sample_df.iterrows():
    prompt = get_prompt("zero-shot", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.3, top_p=0.9)
    gold = row["label"].lower()
    match = pred.startswith(gold)

    results.append({
        "text": row["text"],
        "aspect": row["span"],
        "gold": gold,
        "predicted": pred,
        "match": match
    })

df_results = pd.DataFrame(results)


In [None]:
accuracy = df_results["match"].mean()
print(f"✅ Zero-shot Accuracy (10 samples): {accuracy:.2%}")
df_results


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
Examples:
Review: "The service was poor." → Aspect: service → Sentiment: negative
Review: "Loved the food." → Aspect: food → Sentiment: positive
Review: "The ambiance was okay." → Aspect: ambiance → Sentiment: neutral

Now analyze this:
Review: "{sentence}"
Aspect: "{aspect}"
Sentiment:"""


In [None]:
results = []
for _, row in sample_df.iterrows():
    prompt = get_prompt("few-shot", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.3, top_p=0.9)
    gold = row["label"].lower()
    match = pred.startswith(gold)

    results.append({
        "text": row["text"],
        "aspect": row["span"],
        "gold": gold,
        "predicted": pred,
        "match": match
    })

df_results = pd.DataFrame(results)


In [None]:
accuracy = df_results["match"].mean()
print(f"✅ Few-shot Accuracy (10 samples): {accuracy:.2%}")
df_results


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
Let's analyze this step-by-step.

Review: "{sentence}"
Aspect: "{aspect}"

Step 1: Identify how the sentence describes the aspect.
Step 2: Determine whether the sentiment is positive, negative, or neutral.
Step 3: Return only the sentiment word (no explanation).

Answer:"""


In [None]:
results = []

for i, row in sample_df.iterrows():
    prompt = get_prompt("cot", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.3, top_p=0.9)
    gold = row["label"].lower()
    match = pred.startswith(gold)

    results.append({
        "sample": i + 1,
        "review": row["text"],
        "aspect": row["span"],
        "gold": gold,
        "gpt_response": pred,
        "match": match
    })

    print(f"\n=== Sample {i+1} ===")
    print(f"Review: {row['text']}")
    print(f"Aspect: {row['span']}")
    print(f"Gold: {gold}")
    print(f"--- GPT Response ---\n{pred}")
    print(f"✅ Match: {match}")


In [None]:
accuracy = df_results["match"].mean()
print(f"✅ CoT Accuracy (10 samples): {accuracy:.2%}")
df_results


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
You are a sentiment analysis system. Your task is to classify the sentiment of a specific aspect mentioned in a customer review.

Instructions:
- Determine the sentiment polarity (positive, negative, or neutral) toward the given aspect.
- Only return the sentiment word (e.g., positive).

Review: "{sentence}"
Aspect: "{aspect}"
Sentiment:"""


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
You are a sentiment analysis system. Your task is to classify the sentiment of a specific aspect mentioned in a customer review.

Instructions:
- Determine the sentiment polarity (positive, negative, or neutral) toward the given aspect.
- Only return the sentiment word (e.g., positive).

Review: "{sentence}"
Aspect: "{aspect}"
Sentiment:"""


In [None]:
results = []
for i, row in sample_df.iterrows():
    prompt = get_prompt("instruction", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.3, top_p=0.9)
    gold = row["label"].lower()
    match = pred.startswith(gold)

    results.append({
        "prompt_type": "instruction",
        "review": row["text"],
        "aspect": row["span"],
        "gold": gold,
        "predicted": pred,
        "match": match
    })

    print(f"\n=== Instruction Prompt Sample {i+1} ===")
    print(f"Review: {row['text']}")
    print(f"Aspect: {row['span']}")
    print(f"Gold: {gold}")
    print(f"GPT: {pred}")
    print(f"✅ Match: {match}")


In [None]:
accuracy = sum(r["match"] for r in results) / len(results)
print(f"\n✅ Instruction Prompt Accuracy: {accuracy:.2%}")


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
You are an expert aspect-based sentiment analysis reviewer working for a Fortune 500 AI company.

Your job is to determine whether the sentiment expressed about a given aspect in a customer review is positive, negative, or neutral.

Review: "{sentence}"
Aspect: "{aspect}"
Answer only with the sentiment word:"""


In [None]:
accuracy = sum(r["match"] for r in results) / len(results)
print(f"\n✅ Instruction Prompt Accuracy: {accuracy:.2%}")


In [None]:
def get_prompt(method, sentence, aspect):
    return f"""
Here are some examples:

Review: "The food was great."
Aspect: food
Sentiment: positive

Review: "The staff was rude."
Aspect: staff
Sentiment: negative

Review: "The decor was fine."
Aspect: decor
Sentiment: neutral

Now analyze this review:

Review: "{sentence}"
Aspect: {aspect}
Sentiment:"""


In [None]:
import itertools

temperatures = [0.0, 0.3, 0.7]
top_ps = [0.8, 0.9, 1.0]

results = []

for temp, top_p in itertools.product(temperatures, top_ps):
    correct = 0
    total = 0

    print(f"\n🔍 Testing Few-shot | Temp={temp}, Top_p={top_p}")

    for _, row in sample_df.iterrows():
        prompt = get_prompt("few-shot", row["text"], row["span"])
        pred = ask_gpt(prompt, temperature=temp, top_p=top_p)
        gold = row["label"].lower()
        match = pred.strip().startswith(gold)
        correct += match
        total += 1

    accuracy = correct / total
    print(f"✅ Accuracy: {accuracy:.2%}")

    results.append({
        "temperature": temp,
        "top_p": top_p,
        "accuracy": accuracy
    })


In [None]:
import pandas as pd
df_temp_results = pd.DataFrame(results)
df_temp_results.sort_values(by="accuracy", ascending=False)


In [None]:
temperature = 0.7
top_p = 1.0


In [None]:
correct = 0
total = 0

for _, row in sample_df.iterrows():
    prompt = get_prompt("few-shot", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.7, top_p=1.0)
    gold = row["label"].lower()
    match = pred.strip().startswith(gold)
    correct += match
    total += 1

print(f"\n✅ Accuracy with temp=0.7 and top_p=1.0: {correct/total:.2%}")


In [None]:
def reflect_and_correct(review, aspect, gpt_answer, gold_label):
    reflection_prompt = f"""
You are an expert in sentiment analysis.

Review: "{review}"
Aspect: "{aspect}"
Initial sentiment prediction: "{gpt_answer}"
Actual correct sentiment: "{gold_label}"

Was the initial sentiment correct? Answer Yes or No.
If No, correct the sentiment and explain why.
"""
    return ask_gpt(reflection_prompt, temperature=0.3, top_p=0.9)


In [None]:
results_reflection = []

for i, row in sample_df.iterrows():
    prompt = get_prompt("few-shot", row["text"], row["span"])
    pred = ask_gpt(prompt, temperature=0.7, top_p=1.0).strip().lower()
    gold = row["label"].lower()
    match_before = pred.startswith(gold)

    corrected_pred = pred  # default to original
    reflection_output = ""

    if not match_before:
        reflection_output = reflect_and_correct(row["text"], row["span"], pred, gold)

        # Try to extract corrected label from the reflection output
        for sentiment in ["positive", "negative", "neutral"]:
            if sentiment in reflection_output.lower():
                corrected_pred = sentiment
                break

    match_after = corrected_pred.startswith(gold)

    results_reflection.append({
        "text": row["text"],
        "aspect": row["span"],
        "gold": gold,
        "original_pred": pred,
        "corrected_pred": corrected_pred,
        "reflection_output": reflection_output,
        "match_before": match_before,
        "match_after": match_after
    })


In [None]:
df_reflection = pd.DataFrame(results_reflection)

# Before and After Accuracy
acc_before = df_reflection["match_before"].mean()
acc_after = df_reflection["match_after"].mean()

print(f"\n🎯 Accuracy BEFORE reflection: {acc_before:.2%}")
print(f"🪞 Accuracy AFTER reflection:  {acc_after:.2%}")


In [None]:
import matplotlib.pyplot as plt

plt.bar(["Before Reflection", "After Reflection"], [acc_before, acc_after], color=["gray", "green"])
plt.title("Accuracy Before vs After Reflection")
plt.ylim(0, 1)
plt.ylabel("Accuracy")
plt.grid(axis='y')
plt.show()
