# Lab 1 ‚Äî Zero-Shot vs. Few-Shot Classification

**Module reference:** [Module 3, ¬ß3.2‚Äì¬ß3.3](https://github.com/kunalsuri/prompt-engineering-playbook/blob/main/learn/03-patterns.md)

This lab sends the same sentiment-classification task to an LLM twice:
1. **Zero-shot** ‚Äî instruction only, no examples
2. **Few-shot** ‚Äî instruction + 3 demonstration examples

It runs each variant multiple times and compares accuracy and consistency.

---

### Free API Options
| Provider | Free Tier | Sign Up |
|---|---|---|
| **Google Gemini** (recommended) | 15 RPM, 1M tokens/day | [aistudio.google.com/apikey](https://aistudio.google.com/apikey) |
| **Groq** | 30 RPM, 14.4K tokens/min | [console.groq.com](https://console.groq.com) |
| **OpenAI** (paid) | Pay-per-token | [platform.openai.com](https://platform.openai.com/api-keys) |

In [None]:
#@title üîß Setup ‚Äî Run this cell first
!pip install -q openai

import getpass, os

print("Choose your LLM provider (all work with this lab):")
print("  1. Google Gemini (FREE ‚Äî recommended)")
print("  2. Groq (FREE)")
print("  3. OpenAI (paid)")
choice = input("\nEnter 1, 2, or 3: ").strip()

if choice == "1":
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API key: ")
    BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
    API_KEY = os.environ["GOOGLE_API_KEY"]
    MODEL = "gemini-2.0-flash"
elif choice == "2":
    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API key: ")
    BASE_URL = "https://api.groq.com/openai/v1"
    API_KEY = os.environ["GROQ_API_KEY"]
    MODEL = "llama-3.1-8b-instant"
else:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")
    BASE_URL = None
    API_KEY = os.environ["OPENAI_API_KEY"]
    MODEL = "gpt-4o-mini"

from openai import OpenAI
client_kwargs = {"api_key": API_KEY}
if BASE_URL:
    client_kwargs["base_url"] = BASE_URL
client = OpenAI(**client_kwargs)

def complete(prompt, *, system="", temperature=0.2, max_tokens=1024):
    messages = []
    if system:
        messages.append({"role": "system", "content": system})
    messages.append({"role": "user", "content": prompt})
    response = client.chat.completions.create(
        model=MODEL, messages=messages,
        temperature=temperature, max_tokens=max_tokens
    )
    return response.choices[0].message.content or ""

print(f"\n‚úÖ Connected to {MODEL}")

## Test Data & Prompt Variants

In [None]:
# Test data: (review_text, expected_label)
TEST_CASES = [
    ("The battery lasts all day and the camera is incredible!", "Positive"),
    ("Delivery was fast but the product broke after two days.", "Negative"),
    ("It's okay for the price. Nothing special, nothing terrible.", "Neutral"),
    ("Absolutely love it ‚Äî best purchase I've made this year.", "Positive"),
    ("Waste of money. Returned it immediately.", "Negative"),
]

SYSTEM = "You are a sentiment classifier. Respond with exactly one word: Positive, Negative, or Neutral."

ZERO_SHOT_TEMPLATE = (
    "Classify the sentiment of the following product review.\n"
    "Respond with exactly one word: Positive, Negative, or Neutral.\n\n"
    "Review: \"{review}\"\n"
    "Sentiment:"
)

FEW_SHOT_TEMPLATE = (
    "Classify the sentiment of product reviews.\n"
    "Respond with exactly one word: Positive, Negative, or Neutral.\n\n"
    "Review: \"Superb quality and fast shipping!\" ‚Üí Positive\n"
    "Review: \"Stopped working after a week. Very disappointed.\" ‚Üí Negative\n"
    "Review: \"It does what it says. Average product.\" ‚Üí Neutral\n\n"
    "Review: \"{review}\"\n"
    "Sentiment:"
)

RUNS_PER_VARIANT = 3

print("Test data and prompts loaded.")

## Run Experiment

In [None]:
def normalize_label(raw):
    cleaned = raw.strip().strip(".").strip()
    for label in ("Positive", "Negative", "Neutral"):
        if label.lower() in cleaned.lower():
            return label
    return cleaned[:20]


results = {"zero_shot": [], "few_shot": []}

for variant_name, template in [("zero_shot", ZERO_SHOT_TEMPLATE), ("few_shot", FEW_SHOT_TEMPLATE)]:
    print(f"Running {variant_name.replace('_', '-')} variant ({RUNS_PER_VARIANT} runs √ó {len(TEST_CASES)} cases)...")
    for review, expected in TEST_CASES:
        prompt = template.format(review=review)
        run_results = []
        for _ in range(RUNS_PER_VARIANT):
            raw = complete(prompt, system=SYSTEM, temperature=0.3)
            predicted = normalize_label(raw)
            run_results.append(predicted)

        majority = max(set(run_results), key=run_results.count)
        correct = majority == expected
        consistent = len(set(run_results)) == 1

        results[variant_name].append({
            "review": review[:40] + "...",
            "expected": expected,
            "predictions": run_results,
            "majority": majority,
            "correct": correct,
            "consistent": consistent,
        })

print("\n‚úÖ Experiment complete!")

## Results

In [None]:
import pandas as pd

for variant_name in ("zero_shot", "few_shot"):
    label = variant_name.replace("_", "-").title()
    print(f"\n--- {label} Results ---")
    rows = []
    for r in results[variant_name]:
        rows.append({
            "Review": r["review"],
            "Expected": r["expected"],
            "Predictions": ", ".join(r["predictions"]),
            "Correct": "‚úì" if r["correct"] else "‚úó",
            "Consistent": "‚úì" if r["consistent"] else "‚úó",
        })
    display(pd.DataFrame(rows))

# Summary
print("\n" + "="*50)
print("  SUMMARY")
print("="*50)
zs = results["zero_shot"]
fs = results["few_shot"]
zs_acc = sum(1 for r in zs if r["correct"]) / len(zs) * 100
fs_acc = sum(1 for r in fs if r["correct"]) / len(fs) * 100
zs_con = sum(1 for r in zs if r["consistent"]) / len(zs) * 100
fs_con = sum(1 for r in fs if r["consistent"]) / len(fs) * 100

summary = pd.DataFrame([
    {"Metric": "Accuracy (majority vote)", "Zero-Shot": f"{zs_acc:.0f}%", "Few-Shot": f"{fs_acc:.0f}%"},
    {"Metric": "Consistency (all runs agree)", "Zero-Shot": f"{zs_con:.0f}%", "Few-Shot": f"{fs_con:.0f}%"},
])
display(summary)

print("\nüìù Takeaway: Few-shot examples typically improve both accuracy and consistency")
print("on classification tasks, especially for ambiguous or edge-case inputs.")