In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/yelp-reviews-dataset/yelp.csv


In [2]:
import pandas as pd
df = pd.read_csv("/kaggle/input/yelp-reviews-dataset/yelp.csv")
df = df[["text", "stars"]].dropna()
sample_df = df.sample(n=15, random_state=50)
sample_df.head()


Unnamed: 0,text,stars
9102,"Every time I visit my parents, I pass this pla...",3
7868,We took our grandson (Sebastian) to the park f...,5
4176,"Kaley helped me get the best room in Mesa AZ, ...",5
4161,Completely shady. Towed our U-haul as we were...,1
8770,this place is growing on me. i wasn't a big fa...,4


In [3]:
def clean_text(text):
    return " ".join(text.lower().split())[:500]


## Direct Classification Prompting

In [4]:
PROMPT_V1 = """
You are given a Yelp customer review.
Classify the review into a star rating from 1 to 5.

Return your answer strictly in the following JSON format:
{{
  "predicted_stars": <integer from 1 to 5>,
  "explanation": "<brief reason>"
}}

Review:
"{review}"
"""


## Criteria-Based Prompting

In [5]:
PROMPT_V2 = """
You are an expert Yelp review analyst.

Step 1: Internally analyze sentiment, complaints, praise, and overall satisfaction.
Step 2: Decide the most accurate star rating from 1 to 5.
Step 3: Output ONLY the final answer in valid JSON.

Use the following rubric:
- 1 star: Very negative experience, strong complaints
- 2 stars: Mostly negative, some minor positives
- 3 stars: Mixed or neutral experience
- 4 stars: Mostly positive with minor issues
- 5 stars: Extremely positive, enthusiastic praise

Return ONLY valid JSON in this format:

{{
  "predicted_stars": <1-5>,
  "explanation": "<one sentence justification>"
}}

Review:
"{review}"
"""


## Reasoning Constrained Prompting

In [6]:
PROMPT_V3 = """
You are a Yelp rating classifier.

Step 1: Internally analyze sentiment, complaints, praise, and overall satisfaction.
Step 2: Decide the most accurate star rating from 1 to 5.
Step 3: Output ONLY the final answer in valid JSON.

Do NOT include your reasoning steps.

Output format:
{{
  "predicted_stars": <1-5>,
  "explanation": "<concise justification>"
}}

Review:
"{review}"
"""

## Few-Shot Learning Prompting

In [7]:
PROMPT_V4 = """
You are an expert Yelp review analyst.

Examples:
Review: "Terrible service and rude staff."
Stars: 1

Review: "Food was good but service was slow."
Stars: 3

Review: "Amazing food and excellent service!"
Stars: 5

Rules:
- If both positives and negatives are present, choose 3 stars.
- Use 5 stars only for strong enthusiasm with no complaints.
- Avoid inflating ratings.

Return ONLY valid JSON:
{{
  "predicted_stars": <1-5>,
  "explanation": "<brief reason>"
}}

Review:
"{review}"
"""

## grok ai

In [8]:
!pip install -q groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("GROQ_API_KEY")


In [10]:
from groq import Groq
import os

client = Groq(api_key=secret_value_0)
MODEL_NAME = "llama-3.1-8b-instant"



In [11]:
def call_llm(prompt):
    completion = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=120
    )
    return completion.choices[0].message.content.strip()


In [12]:
import json

def run_experiment(prompt_template):
    results = []

    for _, row in sample_df.iterrows():
        prompt = prompt_template.format(review=row["text"])
        response = call_llm(prompt)

        try:
            parsed = json.loads(response)
            results.append({
                "actual": row["stars"],
                "predicted": parsed["predicted_stars"],
                "valid_json": True
            })
        except:
            results.append({
                "actual": row["stars"],
                "predicted": None,
                "valid_json": False
            })

    return pd.DataFrame(results)


In [13]:
res_v1 = run_experiment(PROMPT_V1)
res_v2 = run_experiment(PROMPT_V2)
res_v3 = run_experiment(PROMPT_V3)
res_v4 = run_experiment(PROMPT_V4)

## Metrics

In [14]:
def evaluate(df):
    return {
        "Accuracy": (df["actual"] == df["predicted"]).mean(),
        "JSON_Validity": df["valid_json"].mean()
    }

summary = pd.DataFrame([
    {"Prompt": "V1", **evaluate(res_v1)},
    {"Prompt": "V2", **evaluate(res_v2)},
    {"Prompt": "V3", **evaluate(res_v3)},
    {"Prompt": "V4", **evaluate(res_v4)},
])

summary

Unnamed: 0,Prompt,Accuracy,JSON_Validity
0,V1,0.866667,1.0
1,V2,0.8,1.0
2,V3,0.8,1.0
3,V4,0.733333,1.0
