In [None]:
import google.generativeai as genai
import pandas as pd
import json
import re
from sklearn.metrics import precision_score, recall_score, f1_score

In [None]:
API_KEY = API_KEY

# Configure Gemini
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-pro')

In [None]:
tickets = [
    {"id": 1, "text": "I was charged twice for my order. Need refund.", "true_tags": ["billing", "refund", "payment_issue"]},
    {"id": 2, "text": "App crashes when uploading files on iPhone.", "true_tags": ["technical", "mobile", "bug"]},
    {"id": 3, "text": "Forgot password, reset email not arriving.", "true_tags": ["account", "login", "password"]},
    {"id": 4, "text": "Order hasn't arrived, tracking shows stuck.", "true_tags": ["shipping", "delivery", "tracking"]},
    {"id": 5, "text": "Want dark mode feature in mobile app.", "true_tags": ["feature_request", "mobile", "ui"]},
    {"id": 6, "text": "Cannot update payment method in settings.", "true_tags": ["billing", "account", "technical"]},
    {"id": 7, "text": "Getting error 500 when trying to login.", "true_tags": ["technical", "login", "bug"]},
    {"id": 8, "text": "Need to delete my account permanently.", "true_tags": ["account", "privacy", "deletion"]},
    {"id": 9, "text": "Received damaged product, need replacement urgently.", "true_tags": ["shipping", "delivery", "urgent"]},
    {"id": 10, "text": "Suspicious login attempt from unknown location.", "true_tags": ["security", "account", "urgent"]},
]

available_tags = ["billing", "technical", "account", "shipping", "feature_request", 
                  "inquiry", "urgent", "security", "bug", "payment_issue", "login", 
                  "refund", "delivery", "mobile", "password", "tracking", "ui", "privacy", "deletion"]


In [None]:
def zero_shot_classify(text):
    prompt = f"""Classify this support ticket into 3 relevant tags.

Ticket: {text}

Available tags: {', '.join(available_tags)}

Return ONLY a JSON array with exactly 3 tags, nothing else.
Example: ["billing", "refund", "urgent"]

Your response:"""
    
    response = model.generate_content(prompt)
    match = re.search(r'\[.*?\]', response.text)
    if match:
        return json.loads(match.group(0))[:3]
    return ["general", "inquiry", "support"]

print("\n[Zero-Shot Classification]")
zero_results = []
for ticket in tickets:
    tags = zero_shot_classify(ticket['text'])
    zero_results.append(tags)
    print(f"Ticket {ticket['id']}: {tags}")

In [None]:
def few_shot_classify(text):
    prompt = f"""{few_shot_examples}

Now classify this new ticket:
Ticket: {text}

Available tags: {', '.join(available_tags)}

Return ONLY a JSON array with exactly 3 tags.
Your response:"""
    
    response = model.generate_content(prompt)
    match = re.search(r'\[.*?\]', response.text)
    if match:
        return json.loads(match.group(0))[:3]
    return ["general", "inquiry", "support"]

print("\n[Few-Shot Classification]")
few_results = []
for ticket in tickets:
    tags = few_shot_classify(ticket['text'])
    few_results.append(tags)
    print(f"Ticket {ticket['id']}: {tags}")

In [None]:
def calculate_metrics(true_tags_list, pred_tags_list):
    tp = fp = fn = 0
    for true_tags, pred_tags in zip(true_tags_list, pred_tags_list):
        true_set = set(true_tags)
        pred_set = set(pred_tags)
        tp += len(true_set & pred_set)
        fp += len(pred_set - true_set)
        fn += len(true_set - pred_set)
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return {"precision": precision, "recall": recall, "f1": f1}

true_tags = [t['true_tags'] for t in tickets]

zero_metrics = calculate_metrics(true_tags, zero_results)
few_metrics = calculate_metrics(true_tags, few_results)

print("RESULTS")
print(f"\nZero-Shot: Precision={zero_metrics['precision']:.3f}, Recall={zero_metrics['recall']:.3f}, F1={zero_metrics['f1']:.3f}")
print(f"Few-Shot:  Precision={few_metrics['precision']:.3f}, Recall={few_metrics['recall']:.3f}, F1={few_metrics['f1']:.3f}")
print(f"\nImprovement: {((few_metrics['f1'] - zero_metrics['f1']) / zero_metrics['f1'] * 100):.1f}%")


In [None]:
df_results = pd.DataFrame({
    'ticket_id': [t['id'] for t in tickets],
    'text': [t['text'] for t in tickets],
    'true_tags': [str(t['true_tags']) for t in tickets],
    'zero_shot': [str(z) for z in zero_results],
    'few_shot': [str(f) for f in few_results]
})

df_results.to_csv('results.csv', index=False)

with open('metrics.json', 'w') as f:
    json.dump({
        'zero_shot': zero_metrics,
        'few_shot': few_metrics
    }, f, indent=2)


