In [1]:
# pip install python-dotenv groq pandas tqdm

In [2]:
# --- Cell 1: Imports and setup ---
import os
import json
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from groq import Groq
import re

# Load environment variables from .env file
load_dotenv()

# Initialize Groq client
api_key = os.getenv("GROQ_API_KEY")

client = Groq(api_key=api_key)


In [3]:
df = pd.read_csv("yelp.csv").sample(50, random_state=42)
df.head()

Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
6252,QVR7dsvBeg8xFt9B-vd1BA,2010-07-22,hwYVJs8Ko4PMjI19QcR57g,4,We got here around midnight last Friday... the...,review,90a6z--_CUrl84aCzZyPsg,5,5,2
4684,24qSrF_XOrvaHDBy-gLIQg,2012-01-22,0mvthYPKb2ZmKhCADiKSmQ,5,Brought a friend from Louisiana here. She say...,review,9lJAj_2zCvP2jcEiRjF9oA,0,0,0
1731,j0Uc-GuOe-x9_N_IK1KPpA,2009-05-09,XJHknNIecha6h0wkBSZB4w,3,"Every friday, my dad and I eat here. We order ...",review,0VfJi9Au0rVFVnPKcJpt3Q,0,0,0
4742,RBiiGw8c7j-0a8nk35JO3w,2010-12-22,z6y3GRpYDqTznVe-0dn--Q,1,"My husband and I were really, really disappoin...",review,lwppVF0Yqkuwt-xaEuugqw,2,2,2
4521,U8VA-RW6LYOhxR-Ygi6eDw,2011-01-17,vhWHdemMvsqVNv5zi2OMiA,5,Love this place! Was in phoenix 3 weeks for w...,review,Y2R_tlSk4lTHiLXTDsn1rg,0,1,0


### Prompt 1

In [4]:
prompts = ["Classify this Yelp review into 1-5 stars. Provide only a JSON output with 'review_text' and 'star_rating' : '{}'"]

detailed_results = []

for p in prompts:
    correct = 0
    total = 0
    valid_json = 0
    
    print(f"\n Running prompt: {p[:50]}...\n")
    for _, row in tqdm(df.iterrows(), total=len(df)):
        review = row["text"]
        actual = row["stars"]
        
        messages = [
            {"role": "system", "content": "You are an expert at sentiment analysis"},
            {"role": "user", "content": p.format(review)},
        ]
        
        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",  
            messages=messages,
            temperature=0.3,
            max_tokens=100,
        )
        
        raw_output = response.choices[0].message.content.strip()

        pred = None
        try:
            pred = json.loads(raw_output)
            predicted = pred.get("star_rating", None)
            valid_json += 1
        except json.JSONDecodeError:
            match = re.search(r"\b[1-5]\b", raw_output)
            predicted = int(match.group()) if match else None

        if predicted is not None and predicted == actual:
            correct += 1

        total += 1
        
        detailed_results.append({
            "Review": review[:150] + ("..." if len(review) > 150 else ""), 
            "Predicted Rating": predicted,
            "Actual Rating": actual,
            "Match": predicted == actual,
            "Valid JSON": "True" if isinstance(pred, dict) else "False",
            "Raw Model Output": raw_output
        })

accuracy = round(correct / total, 3) if total > 0 else 0
json_validity = round(valid_json / total, 3) if total > 0 else 0
results_df = pd.DataFrame(detailed_results)
print("\n Results:")
print(results_df)
print(f"\n Accuracy: {accuracy}")
print(f"\n JSON Validity: {json_validity}")


 Running prompt: Classify this Yelp review into 1-5 stars. Provide ...



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:56<00:00,  1.12s/it]


 Results:
                                               Review  Predicted Rating  \
0   We got here around midnight last Friday... the...               NaN   
1   Brought a friend from Louisiana here.  She say...               5.0   
2   Every friday, my dad and I eat here. We order ...               4.0   
3   My husband and I were really, really disappoin...               NaN   
4   Love this place!  Was in phoenix 3 weeks for w...               3.0   
5   This hotel is in a good location for getting t...               NaN   
6   I love that this place has top seafood plates ...               NaN   
7   Awesome if you like ramen...even awesomer if y...               5.0   
8   Great place for a "home office" morning. One o...               5.0   
9   1 star for service, but the food is not ok :( ...               1.0   
10  Good food and able to manage a large group eas...               5.0   
11  This place is great!\nIf you talk to anybody t...               NaN   
12            




### Prompt 2

In [5]:
prompts = ["Analyze this text carefully and rate it from 1–5 stars based on the reviewer’s satisfaction (1 = very dissatisfied, 5 = very satisfied). Provide JSON output {{'review_text': '...', 'star_rating': <1–5>}}: '{}'"]

detailed_results = []

for p in prompts:
    correct = 0
    total = 0
    valid_json = 0
    
    print(f"\n Running prompt: {p[:50]}...\n")
    for _, row in tqdm(df.iterrows(), total=len(df)):
        review = row["text"]
        actual = row["stars"]
        
        messages = [
            {"role": "system", "content": "You are an expert at sentiment analysis. Always return a JSON object with 'review_text' and 'star_rating'."},
            {"role": "user", "content": p.format(review)},
        ]
        
        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",  
            messages=messages,
            temperature=0.3,
            max_tokens=100,
        )
        
        raw_output = response.choices[0].message.content.strip()

        pred = None
        try:
            pred = json.loads(raw_output)
            predicted = pred.get("star_rating", None)
            valid_json += 1
        except json.JSONDecodeError:
            match = re.search(r"\b[1-5]\b", raw_output)
            predicted = int(match.group()) if match else None

        if predicted is not None and predicted == actual:
            correct += 1

        total += 1
        
        detailed_results.append({
            "Review": review[:150] + ("..." if len(review) > 150 else ""), 
            "Predicted Rating": predicted,
            "Actual Rating": actual,
            "Match": predicted == actual,
            "Valid JSON": "True" if isinstance(pred, dict) else "False",
            "Raw Model Output": raw_output
        })

accuracy = round(correct / total, 3) if total > 0 else 0
json_validity = round(valid_json / total, 3) if total > 0 else 0
results_df = pd.DataFrame(detailed_results)
print("\n Results:")
print(results_df)
print(f"\n Accuracy: {accuracy}")
print(f"\n JSON Validity: {json_validity}")


 Running prompt: Analyze this text carefully and rate it from 1–5 s...



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [01:55<00:00,  2.32s/it]


 Results:
                                               Review  Predicted Rating  \
0   We got here around midnight last Friday... the...               NaN   
1   Brought a friend from Louisiana here.  She say...               5.0   
2   Every friday, my dad and I eat here. We order ...               4.0   
3   My husband and I were really, really disappoin...               NaN   
4   Love this place!  Was in phoenix 3 weeks for w...               3.0   
5   This hotel is in a good location for getting t...               NaN   
6   I love that this place has top seafood plates ...               NaN   
7   Awesome if you like ramen...even awesomer if y...               5.0   
8   Great place for a "home office" morning. One o...               5.0   
9   1 star for service, but the food is not ok :( ...               1.0   
10  Good food and able to manage a large group eas...               5.0   
11  This place is great!\nIf you talk to anybody t...               NaN   
12            




### Prompt 3

In [6]:
prompts = ["Consider both positive and negative aspects of this Yelp review, and decide the most accurate 1–5 star rating that reflects overall satisfaction. Strictly provide only a JSON output with 'review_text' and 'star_rating' : '{}'"]

detailed_results = []

for p in prompts:
    correct = 0
    total = 0
    valid_json = 0
    
    print(f"\n Running prompt: {p[:50]}...\n")
    for _, row in tqdm(df.iterrows(), total=len(df)):
        review = row["text"]
        actual = row["stars"]
        
        messages = [
            {"role": "system", "content": "You are an expert at sentiment analysis. Always return a JSON object with 'review_text' and 'star_rating'."},
            {"role": "user", "content": p.format(review)},
        ]
        
        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",  
            messages=messages,
            temperature=0.3,
            max_tokens=100,
        )
        
        raw_output = response.choices[0].message.content.strip()

        pred = None
        try:
            pred = json.loads(raw_output)
            predicted = pred.get("star_rating", None)
            valid_json += 1
        except json.JSONDecodeError:
            match = re.search(r"\b[1-5]\b", raw_output)
            predicted = int(match.group()) if match else None

        if predicted is not None and predicted == actual:
            correct += 1

        total += 1
        
        detailed_results.append({
            "Review": review[:150] + ("..." if len(review) > 150 else ""), 
            "Predicted Rating": predicted,
            "Actual Rating": actual,
            "Match": predicted == actual,
            "Valid JSON": "True" if isinstance(pred, dict) else "False",
            "Raw Model Output": raw_output
        })

accuracy = round(correct / total, 3) if total > 0 else 0
json_validity = round(valid_json / total, 3) if total > 0 else 0
results_df = pd.DataFrame(detailed_results)
print("\n Results:")
print(results_df)
print(f"\n Accuracy: {accuracy}")
print(f"\n JSON Validity: {json_validity}")


 Running prompt: Consider both positive and negative aspects of thi...



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [01:55<00:00,  2.30s/it]


 Results:
                                               Review  Predicted Rating  \
0   We got here around midnight last Friday... the...               NaN   
1   Brought a friend from Louisiana here.  She say...               5.0   
2   Every friday, my dad and I eat here. We order ...               4.0   
3   My husband and I were really, really disappoin...               NaN   
4   Love this place!  Was in phoenix 3 weeks for w...               5.0   
5   This hotel is in a good location for getting t...               NaN   
6   I love that this place has top seafood plates ...               NaN   
7   Awesome if you like ramen...even awesomer if y...               5.0   
8   Great place for a "home office" morning. One o...               5.0   
9   1 star for service, but the food is not ok :( ...               1.0   
10  Good food and able to manage a large group eas...               5.0   
11  This place is great!\nIf you talk to anybody t...               NaN   
12            


