In [9]:
!pip install groq scikit-learn pandas pyyaml

Collecting groq
  Downloading groq-1.0.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-1.0.0-py3-none-any.whl (138 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-1.0.0


In [12]:
os.environ['GROQ_API_KEY'] = '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'

In [14]:
import os
import pandas as pd
from groq import Groq
from sklearn.metrics import accuracy_score, f1_score

# Load Groq API key
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
    raise ValueError("Set GROQ_API_KEY in environment or replace here.")

client = Groq(api_key=api_key)

# Inline data: E-commerce customer reviews (ties to your Ecompulse project)
data = {
    'text': [
        "This product was amazing and delivered fast!", "Worst purchase ever, broken on arrival.",
        "Loved the quality and customer service.", "Overpriced and poor material.",
        "Excellent value, highly recommend!", "Very disappointing, doesn't work as advertised.",
        "Great shopping experience.", "Slow shipping and bad packaging.",
        "Best electronics buy this year.", "Returned it immediately, total waste."
    ],
    'label': [1,0,1,0,1,0,1,0,1,0]
}
df = pd.DataFrame(data)
texts = df['text'].tolist()
true_labels = df['label'].tolist()  # 1=positive, 0=negative

# Base zero-shot prompt (business-focused for your resume)
templates = {
    'zero_shot': "As a business analyst reviewing e-commerce customer feedback, classify the sentiment of this review as positive or negative: {text}"
}

# Get LLM response
def get_llm_response(prompt, text):
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",  # Updated: Current & best replacement
        messages=[{"role": "user", "content": prompt.format(text=text)}],
        temperature=0.2,  # Low for reliable classification
        max_tokens=50
    )
    return response.choices[0].message.content.strip().lower()

# Evaluate prompt performance
def evaluate_prompt(prompt_template):
    pred_labels = []
    for text in texts:
        resp = get_llm_response(prompt_template, text)
        pred = 1 if 'positive' in resp else 0
        pred_labels.append(pred)
    acc = accuracy_score(true_labels, pred_labels)
    f1 = f1_score(true_labels, pred_labels)
    return acc, f1

# Iterative optimization (adds role-playing + chain-of-thought)
def optimize_prompt(base_template):
    optimizations = [
        "Act as an expert sentiment analyst with e-commerce experience. ",
        "Think step by step: 1. Identify emotional words and tone. 2. Consider overall customer satisfaction. 3. Classify as positive or negative. ",
        "Provide brief reasoning before your final answer. "
    ]
    best_prompt = base_template
    best_acc = 0
    for opt in optimizations:
        new_prompt = opt + best_prompt
        acc, _ = evaluate_prompt(new_prompt)
        if acc > best_acc:
            best_acc = acc
            best_prompt = new_prompt
            print(f"Improved! New accuracy: {acc:.2f}")
    return best_prompt, best_acc

# Run the demo
print("Evaluating base zero-shot prompt on e-commerce reviews...")
base_acc, base_f1 = evaluate_prompt(templates['zero_shot'])
print(f"Base Accuracy: {base_acc:.2f} | F1: {base_f1:.2f}")

print("\nOptimizing prompt with advanced techniques...")
optimized_prompt, opt_acc = optimize_prompt(templates['zero_shot'])
print(f"\nOptimized Prompt:\n{optimized_prompt}")
print(f"Optimized Accuracy: {opt_acc:.2f}")

improvement = (opt_acc - base_acc) * 100 if opt_acc > base_acc else 0
print(f"\nImprovement through prompt engineering: {improvement:.0f}%")

# Save report for GitHub
with open("report.txt", "w") as f:
    f.write(f"Base Accuracy: {base_acc:.2f}\nOptimized Accuracy: {opt_acc:.2f}\nImprovement: {improvement:.0f}%\nModel: llama-3.3-70b-versatile (Groq)")
print("\nReport saved to report.txt – Add this to your GitHub README!")

Evaluating base zero-shot prompt on e-commerce reviews...
Base Accuracy: 0.90 | F1: 0.91

Optimizing prompt with advanced techniques...
Improved! New accuracy: 1.00

Optimized Prompt:
Act as an expert sentiment analyst with e-commerce experience. As a business analyst reviewing e-commerce customer feedback, classify the sentiment of this review as positive or negative: {text}
Optimized Accuracy: 1.00

Improvement through prompt engineering: 10%

Report saved to report.txt – Add this to your GitHub README!
