In [1]:
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from tqdm.auto import tqdm
from transformers import pipeline, logging
from IPython.display import display

In [2]:
# Load

In [3]:
#Load Your Dataset
ds = load_dataset("imdb", split="test[:100]")
df = pd.DataFrame({
    "text": ds["text"],
    "label": ["Positive" if l == 1 else "Negative" for l in ds["label"]]
})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
print(df.head())

                                                text     label
0  I love sci-fi and am willing to put up with a ...  Negative
1  Worth the entertainment value of a rental, esp...  Negative
2  its a totally average film with a few semi-alr...  Negative
3  STAR RATING: ***** Saturday Night **** Friday ...  Negative
4  First off let me say, If you haven't enjoyed a...  Negative


In [5]:
class_labels = ["Positive", "Negative"]

In [6]:
def normalize_prediction(text):
    text = text.strip().lower()
    for label in class_labels:
        if label.lower() in text:
            return label
    return None

In [7]:
## Prepare the Prompt
def build_prompt(text):
    # Keep the instruction simple and direct
    instruction = (
        f"Instruction: Classify the movie review sentiment as {', '.join(class_labels)}. "
        f"Respond with only the label."
    )

    few_shot_examples = [
        {"text": "The film had stunning visuals and a beautiful story.", "label": "Positive"},
        {"text": "I didn't like the pacing. It felt too slow and boring.", "label": "Negative"}
    ]

    prompt = instruction + "\n\n"
    for ex in few_shot_examples:
        # Use single newlines between Review and Sentiment
        prompt += f"Review: {ex['text']}\nLabel: {ex['label']}\n\n"

    # End exactly at "Label:" with a single space, no trailing newlines
    prompt += f"Review: {text}\nLabel:"
    return prompt

In [8]:
print(build_prompt("The film was full oflaughterss and madness. 10/10 recomenddd!!!!"))

Instruction: Classify the movie review sentiment as Positive, Negative. Respond with only the label.

Review: The film had stunning visuals and a beautiful story.
Label: Positive

Review: I didn't like the pacing. It felt too slow and boring.
Label: Negative

Review: The film was full oflaughterss and madness. 10/10 recomenddd!!!!
Label:


In [9]:
pip install bitsandbytes



In [10]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_name = "microsoft/phi-2"

# 1. Setup 4-bit quantization config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

# 2. Load tokenizer and config (as we did before)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
config = AutoConfig.from_pretrained(model_name)
config.pad_token_id = tokenizer.pad_token_id

# 3. Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    quantization_config=quant_config, # This is the magic line
    device_map="auto",
    trust_remote_code=True
)

Loading weights:   0%|          | 0/453 [00:00<?, ?it/s]

In [11]:
from time import time


start_time = time() # Start the clock
predictions = []
model.eval() # Set model to evaluation mode

for text in tqdm(df['text'], desc="Classifying"):
    prompt = build_prompt(text)

    # 1. Tokenize (move to GPU)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=2,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id
        )

    # 3. Decode and normalize
    raw = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):]
    prediction = normalize_prediction(raw)
    predictions.append(prediction)
end_time = time() # Stop the clock
# Print runtime
print(f"\n Inference time: {end_time - start_time:.2f} seconds")

Classifying:   0%|          | 0/100 [00:00<?, ?it/s]


 Inference time: 53.71 seconds


In [15]:
# Add preditions to DataFrame
df["few_shot_prediction"] = predictions

# Save to CSV
df.to_csv("few_shot_predictions.csv", index=False)

display(df.head(10))

Unnamed: 0,text,label,few_shot_prediction
0,I love sci-fi and am willing to put up with a ...,Negative,Negative
1,"Worth the entertainment value of a rental, esp...",Negative,
2,its a totally average film with a few semi-alr...,Negative,Negative
3,STAR RATING: ***** Saturday Night **** Friday ...,Negative,Negative
4,"First off let me say, If you haven't enjoyed a...",Negative,
...,...,...,...
95,"This film seems well made, and more efforts sh...",Negative,Negative
96,"It hurt to watch this movie, it really did... ...",Negative,Negative
97,"Rita Hayworth is just stunning at times and, f...",Negative,Negative
98,"Like 'Singin' in the Rain', 'Cover Girl' has a...",Negative,Negative


In [18]:
import random

def evaluate_predictions(df, label_col="label", pred_col="few_shot_prediction", num_examples=5):
  # Normalize both columns (optional cleanup)
  df[label_col] = df[label_col].str.lower().str.strip()
  df[pred_col] = df[pred_col].str.lower().str.strip()

  # Accuracy
  correct = (df[label_col] == df[pred_col]).sum()
  total = len(df)
  accuracy = correct / total

  print(f"\n Accuracy: {accuracy*100:.2f}% ({correct}/{total} correct)")

  # Show a few mismatches
  mismatches = df[df[label_col] != df[pred_col]]
  if mismatches.empty:
    print("No mismatches found!")
  else:
    print(f"\n Showing {min(num_examples, len(mismatches))} random mismatches:")
    for _, row in mismatches.sample(min(num_examples, len(mismatches)), random_state=42).iterrows():
      print(f"\n Review: {row['text']}")
      print(f" > True Label: {row[label_col]}")
      print(f" > Predicted: {row[pred_col]}")
# Example usage
evaluate_predictions(df, label_col="label", pred_col="few_shot_prediction")


 Accuracy: 86.00% (86/100 correct)

 Showing 5 random mismatches:

 Review: An obscure horror show filmed in the Everglades. Two couples stay overnight in a cabin after being made a little uneasy by the unfriendliness of the locals. Who, or what, are the Blood Stalkers? After awhile they find out. Watch for the character of the village idiot who clucks like a chicken, he certainly is weird.
 > True Label: negative
 > Predicted: None

 Review: This film features two of my favorite guilty pleasures. Sure, the effects are laughable, the story confused, but just watching Hasselhoff in his Knight Rider days is always fun. I especially like the old hotel they used to shoot this in, it added to what little suspense was mustered. Give it a 3.
 > True Label: negative
 > Predicted: None

 Review: Worth the entertainment value of a rental, especially if you like action movies. This one features the usual car chases, fights with the great Van Damme kick style, shooting battles with the 40 shell l