# LLMClassifier Test

Test the LLMClassifier on quark/gluon jet data using OpenRouter API.


In [1]:
import sys
import os
import numpy as np
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

from vibe_jet_tagging import LLMClassifier
from sklearn.metrics import roc_auc_score, accuracy_score


## Load Data

Load the quark/gluon jet dataset.


In [2]:
# Load data
data_path = Path.cwd().parent / 'data' / 'qg_jets.npz'
data = np.load(data_path)

X = data['X']
y = data['y']

print(f"Loaded {len(X)} jets")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"Quark jets: {(y == 1).sum()}")
print(f"Gluon jets: {(y == 0).sum()}")


Loaded 10000 jets
X shape: (10000, 139, 4)
y shape: (10000,)
Quark jets: 5074
Gluon jets: 4926


## Initialize LLMClassifier

Set up the classifier with OpenRouter API.

**Note:** Set your OpenRouter API key as an environment variable:
```bash
export OPENROUTER_API_KEY="your-key-here"
```


In [4]:
# Check if API key is set
if 'OPENROUTER_API_KEY' not in os.environ:
    print("WARNING: OPENROUTER_API_KEY not set. Please set it before running predictions.")
    print("export OPENROUTER_API_KEY='your-key-here'")
else:
    print("✓ API key found")


export OPENROUTER_API_KEY='your-key-here'


In [None]:
# Initialize classifier
clf = LLMClassifier(
    model_name="anthropic/claude-3.5-sonnet",
    template_name="simple_list",
    format_type="list",
    templates_dir=str(Path.cwd().parent / 'templates')
)

# Fit (no-op for zero-shot)
clf.fit([], [])

print("Classifier initialized")
print(f"Model: {clf.model_name}")
print(f"Template: {clf.template_name}")
print(f"Format: {clf.format_type}")


## Test Single Jet Prediction


In [None]:
# Test on a single jet
test_jet = X[0]
true_label = y[0]

print(f"True label: {true_label} ({'quark' if true_label == 1 else 'gluon'})")
print(f"\nJet shape: {test_jet.shape}")
print(f"Number of particles (pt > 0): {(test_jet[:, 0] > 0).sum()}")

# Make prediction
prediction = clf.predict([test_jet])[0]
print(f"\nPredicted label: {prediction} ({'quark' if prediction == 1 else 'gluon'})")
print(f"Correct: {prediction == true_label}")


## Test on 100 Jets

Run the classifier on 100 jets and compute metrics.


In [None]:
# Select 100 jets
n_test = 100
X_test = X[:n_test]
y_test = y[:n_test]

print(f"Testing on {n_test} jets...")
print(f"True distribution: {(y_test == 1).sum()} quark, {(y_test == 0).sum()} gluon")


In [None]:
# Make predictions (this will take a while)
from tqdm.auto import tqdm

predictions = []
for i, jet in enumerate(tqdm(X_test)):
    pred = clf.predict([jet])[0]
    predictions.append(pred)
    
    # Print progress every 10 jets
    if (i + 1) % 10 == 0:
        acc = accuracy_score(y_test[:i+1], predictions)
        print(f"After {i+1} jets: Accuracy = {acc:.3f}")

predictions = np.array(predictions)


## Evaluate Performance


In [None]:
# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
auc = roc_auc_score(y_test, predictions)

print("\n" + "="*50)
print("RESULTS")
print("="*50)
print(f"Accuracy: {accuracy:.3f}")
print(f"AUC Score: {auc:.3f}")
print(f"\nPredicted distribution: {(predictions == 1).sum()} quark, {(predictions == 0).sum()} gluon")
print(f"True distribution: {(y_test == 1).sum()} quark, {(y_test == 0).sum()} gluon")


In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, predictions)
print("\nConfusion Matrix:")
print("                Predicted")
print("                Gluon  Quark")
print(f"True  Gluon     {cm[0,0]:5d}  {cm[0,1]:5d}")
print(f"      Quark     {cm[1,0]:5d}  {cm[1,1]:5d}")
