In [1]:
!pip install pandas google-generativeai tqdm scikit-learn
!pip install transformers accelerate torch
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.49.0-py3-none-manylinux_2_24_x86_64.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.49.0


In [46]:
import pandas as pd
import os
import numpy as np
import json
import re
import google.generativeai as genai
from tqdm import tqdm
from sklearn.metrics import accuracy_score, mean_absolute_error
import requests

In [65]:
MISTRAL_COT_PROMPT_TEMPLATE = """
[INST]
You are an expert sentiment analysis assistant.

Analyze the Yelp review step by step:
1. Identify the overall sentiment
2. Consider aspects like service, food, price, ambiance
3. Map the sentiment to a star rating from 1 to 5

JSON format (must match exactly):
{{"stars": <integer from 1 to 5>, "explanation": "<short explanation>"}}

Rules:
- predicted_stars must be an integer between 1 and 5 and not a decimal
- explanation should briefly summarize the reasoning (1–2 sentences)
- Do NOT include your step-by-step reasoning
- Do NOT include any text outside the JSON

Review:
"{review_text}"
Remember:
Output ONLY the JSON object.
"""


In [None]:
# GEMINI_API_KEY="AIzaSyD3J3AZXPCYrdNfOlvKnRxsTvBIDG1KBrk"
# GEMINI_API_KEY="AIzaSyBcrnxHE8UycWfmwpqufrS2T1F9MAMx-Gk"
# genai.configure(api_key=GEMINI_API_KEY)
# model = genai.GenerativeModel("gemini-3.0-flash-preview")

In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True
)

model.eval()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32768, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [55]:
def safe_parse_model_output(text):
    import json, re

    text = text.replace("```json", "").replace("```", "").strip()

    # 🔥 FIND ALL JSON OBJECTS
    matches = re.findall(r"\{[\s\S]*?\}", text)
    if not matches:
        return None

    # ✅ USE THE LAST ONE (MODEL ANSWER)
    raw_json = matches[-1]

    try:
        data = json.loads(raw_json)
    except Exception:
        return None

    # Flexible schema
    stars = data.get("stars") or data.get("predicted_stars")
    explanation = data.get("explanation", "")

    if stars is None:
        return None

    try:
        stars = int(stars)
    except Exception:
        return None

    if not 1 <= stars <= 5:
        return None

    return {
        "predicted_stars": stars,
        "explanation": explanation,
        "raw_json": raw_json
    }


# COT MODEL

In [56]:
def classify_review_mistral_cot(review_text: str):
    prompt = MISTRAL_COT_PROMPT_TEMPLATE.format(review_text=review_text)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.2,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # ✅ SAFE PARSE HERE
    parsed = safe_parse_model_output(decoded)

    if parsed is None:
        print("⚠️ Parsing failed")
        return None

    return parsed



In [69]:
df = pd.read_csv("/content/yelp.csv")
sampled_df = df.sample(n=200, random_state=42)
sampled_df.head(2)

Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
6252,QVR7dsvBeg8xFt9B-vd1BA,2010-07-22,hwYVJs8Ko4PMjI19QcR57g,4,We got here around midnight last Friday... the...,review,90a6z--_CUrl84aCzZyPsg,5,5,2
4684,24qSrF_XOrvaHDBy-gLIQg,2012-01-22,0mvthYPKb2ZmKhCADiKSmQ,5,Brought a friend from Louisiana here. She say...,review,9lJAj_2zCvP2jcEiRjF9oA,0,0,0


In [70]:
results = []
failed = 0

for _, row in tqdm(sampled_df.iterrows(), total=200):
    prediction1 = classify_review_mistral_cot(row["text"])
    if prediction1 is None:
        failed += 1
        results.append({
            "text": row["text"],
            "true_stars": row["stars"],
            "predicted_stars": None,
            "explanation": None,
            "raw_model_json": None
        })
        continue

    results.append({
        "text": row["text"],
        "true_stars": row["stars"],
        "predicted_stars": prediction1["predicted_stars"],
        "explanation": prediction1["explanation"],
        "raw_model_json": prediction1["raw_json"]
    })


  0%|          | 0/200 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/200 [00:12<42:52, 12.93s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 2/200 [00:17<26:16,  7.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  2%|▏         | 3/200 [00:24<24:23,  7.43s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  2%|▏         | 4/200 [00:30<23:02,  7.05s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  2%|▎         | 5/200 [00:36<21:09,  6.51s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  3%|▎         | 6/200 [00:45<24:31,  7.59s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  4%|▎         | 7/200 [00:51<22:01,  6.85s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  4%|▍         | 8/200 [00:55<19:18,  6.04s/it]Setting `pad_token_id` to `eos_token_id`:

In [71]:
results[:5]

[{'text': "We got here around midnight last Friday... the place was dead. However, they were still serving food and we enjoyed some well made pub grub. Service was friendly, quality cocktails were served, and the atmosphere is derived from an old Uno's, which certainly works for a sports bar. It being located in a somewhat commercial area, I can see why it's empty so late on a Friday. From what my friends tell me - this is a great spot for happy hour, and it stays relatively busy thru 10pm.\n\n*UPDATE - Great patio for day-drinking on the weekends!",
  'true_stars': 4,
  'predicted_stars': 4,
  'explanation': "The review mentions the place was serving food and well-made pub grub, friendly service, quality cocktails, and a good atmosphere. The reviewer also mentions that it's a great spot for happy hour and stays busy until 10pm, but it was empty late on a Friday. The update mentions a great patio for day-drinking on weekends.",
  'raw_model_json': '{"stars": 4, "explanation": "The revi

# ACCURACY

In [72]:
import pandas as pd

results_df = pd.DataFrame(results)
results_df.to_csv("mistral_cot_results.csv", index=False)

valid = results_df.dropna()

accuracy = (valid["true_stars"] == valid["predicted_stars"]).mean()
mae = (valid["true_stars"] - valid["predicted_stars"]).abs().mean()

print(f"Total samples: {len(results_df)}")
print(f"Valid predictions: {len(valid)}")
print(f"Failed predictions: {failed}")
print(f"Accuracy: {accuracy:.3f}")
print(f"MAE: {mae:.3f}")


Total samples: 200
Valid predictions: 200
Failed predictions: 0
Accuracy: 0.670
MAE: 0.340


# JSON VALIDATION

In [73]:
json_validity_rate = len(valid) / len(results_df)
print(f"JSON Validity Rate: {json_validity_rate:.3f}")

JSON Validity Rate: 1.000
