# Yelp Review Classification Task

## Objective
Classify Yelp reviews into 1–5 stars using 3 different prompting approaches.
Evaluate Accuracy, JSON Validity, and Reliability.

## Setup
Ensure `yelp.csv` is in the same directory.

In [None]:
import pandas as pd
import json
import re
import time
import os

# Mock API function - REPLACE THIS WITH ACTUAL API CALL CODE
# e.g., using google-generativeai or openai
try:
    import google.generativeai as genai
    HAS_GENAI = True
except ImportError:
    HAS_GENAI = False

API_KEY = os.getenv("GEMINI_API_KEY") # Set this env var

def call_llm(prompt, model_name="gemini-2.5-pro"):
    if HAS_GENAI and API_KEY:
        genai.configure(api_key=API_KEY)
        model = genai.GenerativeModel(model_name)
        try:
            response = model.generate_content(prompt)
            return response.text
        except Exception as e:
            return f"Error: {e}"
    else:
        # Mock response for testing without API Key
        return '```json\n{"predicted_stars": 4, "explanation": "Simulated response because API_KEY is missing."}ֿ\n```'


In [None]:
# 1. Load Data
df = pd.read_csv('yelp.csv')
print(f"Total rows: {len(df)}")

# Sample 200 rows for efficiency, random_state for reproducibility
sampled_df = df.sample(n=200, random_state=42).copy()
print(f"Sampled rows: {len(sampled_df)}")
sampled_df.head()

In [None]:
# 2. Define Prompts

def clean_json_response(response_text):
    # Attempt to extract JSON from markdown code blocks or raw text
    match = re.search(r'\{.*\}', response_text, re.DOTALL)
    if match:
        return match.group(0)
    return response_text

# Approach 1: Zero-Shot (Direct Instruction)
def prompt_zero_shot(review_text):
    return f"""
You are a sentiment analysis assistant. Analyze the following Yelp review and assign a star rating from 1 to 5.
Return the output STRICTLY as a valid JSON object with the keys "predicted_stars" (integer) and "explanation" (string).

Review:
"{review_text}"

JSON:
"""

# Approach 2: Few-Shot (Providing Examples)
def prompt_few_shot(review_text):
    return f"""
Classify the following Yelp review into 1-5 stars. Return JSON: {{"predicted_stars": <int>, "explanation": <str>}}.

Example 1:
Review: "The service was terrible and the food was cold. Never coming back."
Output: {{"predicted_stars": 1, "explanation": "The reviewer expresses strong dissatisfaction with both service and food quality, indicating a very negative experience."}}

Example 2:
Review: "It was okay, not great. The burger was good but the fries were soggy."
Output: {{"predicted_stars": 3, "explanation": "The reviewer has mixed feelings, praising the burger but criticizing the fries, suggesting an average experience."}}

Example 3:
Review: "Absolutely amazing! Best pizza I've ever had. Staff was super friendly."
Output: {{"predicted_stars": 5, "explanation": "The reviewer uses superlatives like 'amazing' and 'best' and praises the staff, indicating a perfect experience."}}

Task:
Review: "{review_text}"
Output:
"""

# Approach 3: Chain-of-Thought (Reasoning first)
def prompt_cot(review_text):
    return f"""
Analyze the following Yelp review step-by-step to determine the appropriate star rating (1-5).
1. Identify the key positive and negative sentiment markers in the text.
2. Weigh the pros and cons mentions.
3. Determine the overall sentiment score.
4. Assign a star rating based on the overall sentiment.

Finally, provide the result in JSON format: {{"predicted_stars": <int>, "explanation": <string>}}

Review:
"{review_text}"

Analysis and JSON:
"""

In [None]:
# 3. Execution Loop
results = []

dataset = sampled_df.to_dict('records')

for i, row in enumerate(dataset):
    review = row['text']
    actual_stars = row['stars']
    
    # Run all 3 prompts
    prompts = {
        "Zero-Shot": prompt_zero_shot(review),
        "Few-Shot": prompt_few_shot(review),
        "CoT": prompt_cot(review)
    }
    
    row_result = {
        "review_id": row['review_id'],
        "actual_stars": actual_stars
    }
    
    for name, prompt_text in prompts.items():
        try:
            # Start timer
            start_time = time.time()
            raw_response = call_llm(prompt_text)
            latency = time.time() - start_time
            
            # Parse JSON
            json_str = clean_json_response(raw_response)
            data = json.loads(json_str)
            
            row_result[f"{name}_pred"] = data.get('predicted_stars')
            row_result[f"{name}_valid"] = True
            row_result[f"{name}_reason"] = data.get('explanation')
            
        except json.JSONDecodeError:
            row_result[f"{name}_pred"] = None
            row_result[f"{name}_valid"] = False
            row_result[f"{name}_reason"] = "JSON Decode Error"
        except Exception as e:
            row_result[f"{name}_pred"] = None
            row_result[f"{name}_valid"] = False
            row_result[f"{name}_reason"] = str(e)
            
    results.append(row_result)
    if i % 10 == 0:
        print(f"Processed {i}/{len(dataset)}")

results_df = pd.DataFrame(results)

In [None]:
# 4. Evaluation
# Calculate accuracy, valid rate, etc.

def calculate_metrics(df, method_name):
    valid_rows = df[df[f"{method_name}_valid"] == True]
    validity_rate = len(valid_rows) / len(df)
    
    if len(valid_rows) == 0:
        return {"Valid Rate": validity_rate, "Accuracy": 0, "MAE": 0}
        
    # Accuracy (Exact match)
    accuracy = (valid_rows[f"{method_name}_pred"] == valid_rows['actual_stars']).mean()
    
    # Mean Absolute Error
    mae = (valid_rows[f"{method_name}_pred"] - valid_rows['actual_stars']).abs().mean()
    
    return {
        "Method": method_name,
        "Valid Rate": validity_rate,
        "Accuracy": accuracy,
        "MAE": mae
    }

metrics = []
for method in ["Zero-Shot", "Few-Shot", "CoT"]:
    metrics.append(calculate_metrics(results_df, method))
    
metrics_df = pd.DataFrame(metrics)
print(metrics_df)

In [None]:
# Save Results
results_df.to_csv('evaluation_results.csv', index=False)
metrics_df.to_csv('metrics_summary.csv', index=False)