In [43]:
import pandas as pd
import requests
import json
from collections import Counter

# --- Config ---
API_KEY = "SdPjeLjGeatnjQINJyygqKgVjkIwLiuA"
MISTRAL_API_URL = "https://api.mistral.ai/v1/chat/completions"
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# --- Step 1: Load and merge datasets ---
paths = [
    "/home/amin/techeurope/archive/seperated_train_data.csv",
    "/home/amin/techeurope/archive/seperated_test_data.csv",
    "/home/amin/techeurope/archive/seperated_validation_data.csv"
]

print("📥 Merging datasets...")
df_list = [pd.read_csv(path)[["original_prompt"]] for path in paths]
df = pd.concat(df_list, ignore_index=True).dropna().drop_duplicates()
df = df.head(3000)  # Optional: limit number for testing

# --- Step 2: Write merged JSONL input (optional) ---
input_jsonl_path = "merged_prompts_input.jsonl"
with open(input_jsonl_path, "w") as f:
    for prompt in df["original_prompt"]:
        json.dump({"input": prompt}, f)
        f.write("\n")
print(f"📄 Merged JSONL input saved to: {input_jsonl_path}")

# --- Step 3: Classification function ---
def classify_prompt_mistral(prompt: str) -> str:
    system_msg = """
You are a routing expert that decides which type of language model is most suitable for a given user prompt.

You must choose *only one* of the following model categories:
- Small language model
- Large language model

⚠️ Large language models are expensive. Only use them when a small language model cannot handle the task reliably.

Model Category Definitions and Examples:

➡ Small language model  
Use for short tasks, rewriting, summarization, or simple factual/informational answers.  
Models: Mistral-7B-Instruct, Phi, Gemma, Qwen  
Examples:  
- "Summarize this tweet in 10 words."  
- "Translate 'hello world' into German."  
- "List 3 things I should know as a CEO working with developers."  
- "What should a non-tech founder learn to better talk to dev teams?"

➡ Large language model  
Use for development, difficult reasoning tasks, or multi-step planning.  
Models: GPT-4, Claude 3  
Examples:  
- "Write a Python function for classification."  
- "What questions would you ask in a job interview for a molecular biologist?"

Final instruction:  
Only output the category name. Do not explain your reasoning. Be strict and consistent.
"""

    payload = {
        "model": "mistral-large-latest",
        "messages": [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": f"Prompt: {prompt}"}
        ],
        "temperature": 0.0
    }

    try:
        response = requests.post(MISTRAL_API_URL, headers=HEADERS, json=payload)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print(f"❌ Error for prompt: {prompt[:50]} → {e}")
        return "Unknown"

# --- Step 4: Predict and save ---
output_csv = "mistral_predictions_final.csv"
output_jsonl = "mistral_predictions_final.jsonl"
label_counter = Counter()
categories = []

print("🤖 Starting classification...")

with open(output_jsonl, "w") as jsonl_file:
    for i, prompt in enumerate(df["original_prompt"], start=1):
        print(f"[{i}/{len(df)}] Classifying prompt...")
        category = classify_prompt_mistral(prompt)
        categories.append(category)
        label_counter[category] += 1

        # Write to JSONL with label
        json.dump({"input": prompt, "label": category}, jsonl_file)
        jsonl_file.write("\n")
        print(f"→ {category}\n")

# Add predictions to DataFrame and save CSV
df["model_category"] = categories
df.to_csv(output_csv, index=False)

# --- Final Output ---
print("\n📊 Final model category counts:")
for label, count in label_counter.items():
    print(f"- {label}: {count}")

print(f"\n✅ Classification complete.")
print(f"📄 CSV saved to: {output_csv}")
print(f"📄 JSONL saved to: {output_jsonl}")


📥 Merging datasets...
📄 Merged JSONL input saved to: merged_prompts_input.jsonl
🤖 Starting classification...
[1/2166] Classifying prompt...
→ Large language model

[2/2166] Classifying prompt...
→ Small language model

[3/2166] Classifying prompt...
→ Small language model

[4/2166] Classifying prompt...
→ Large language model

[5/2166] Classifying prompt...
→ Small language model

[6/2166] Classifying prompt...
→ Small language model

[7/2166] Classifying prompt...
→ Large language model

[8/2166] Classifying prompt...
→ Small language model

[9/2166] Classifying prompt...
→ Large language model

[10/2166] Classifying prompt...
→ Small language model

[11/2166] Classifying prompt...
→ Large language model

[12/2166] Classifying prompt...
→ Large language model

[13/2166] Classifying prompt...
→ Large language model

[14/2166] Classifying prompt...
→ Small language model

[15/2166] Classifying prompt...
→ Small language model

[16/2166] Classifying prompt...
→ Small language model

[17/