# LLMClassifier Test

Test the LLMClassifier on quark/gluon jet data using OpenRouter API.


In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
import numpy as np
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

from vibe_jet_tagging import LLMClassifier
from sklearn.metrics import roc_auc_score, accuracy_score


## Load Data

Load the quark/gluon jet dataset.


In [13]:
# Load data
data_path = Path.cwd().parent / 'data' / 'qg_jets.npz'
data = np.load(data_path)

X = data['X']
y = data['y']

print(f"Loaded {len(X)} jets")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"Quark jets: {(y == 1).sum()}")
print(f"Gluon jets: {(y == 0).sum()}")


Loaded 10000 jets
X shape: (10000, 139, 4)
y shape: (10000,)
Quark jets: 5074
Gluon jets: 4926


## Initialize LLMClassifier

Set up the classifier with OpenRouter API.

**Note:** Set your OpenRouter API key as an environment variable:
```bash
export OPENROUTER_API_KEY="your-key-here"
```


In [22]:
# Load API key from .env file
from dotenv import load_dotenv
load_dotenv()

# Check if API key is set
if 'OPENROUTER_API_KEY' not in os.environ:
    print("WARNING: OPENROUTER_API_KEY not set. Please set it in .env file.")
    print("Create a .env file with: OPENROUTER_API_KEY='your-key-here'")
else:
    print("✓ API key found")


ModuleNotFoundError: No module named 'dotenv'

In [15]:
# Initialize classifier
clf = LLMClassifier(
    model_name="google/gemini-2.5-flash-lite-preview-09-2025",
    template_name="simple_list",
    format_type="list",
    templates_dir=str(Path.cwd().parent / 'templates')
)

# Fit (no-op for zero-shot)
clf.fit([], [])

print("Classifier initialized")
print(f"Model: {clf.model_name}")
print(f"Template: {clf.template_name}")
print(f"Format: {clf.format_type}")


Classifier initialized
Model: google/gemini-2.5-flash-lite-preview-09-2025
Template: simple_list
Format: list


## Test Single Jet Prediction


In [16]:
# Test on a single jet
test_jet = X[0]
true_label = y[0]

print(f"True label: {true_label} ({'quark' if true_label == 1 else 'gluon'})")
print(f"\nJet shape: {test_jet.shape}")
print(f"Number of particles (pt > 0): {(test_jet[:, 0] > 0).sum()}")

# Make prediction
prediction = clf.predict([test_jet])[0]
print(f"\nPredicted label: {prediction} ({'quark' if prediction == 1 else 'gluon'})")
print(f"Correct: {prediction == true_label}")


True label: 1.0 (quark)

Jet shape: (139, 4)
Number of particles (pt > 0): 18

Predicted label: 0 (gluon)
Correct: False


In [17]:
clf.preview_prompt(test_jet)

PROMPT PREVIEW
Model: google/gemini-2.5-flash-lite-preview-09-2025
Template: simple_list
Format: list
Temperature: 0.0
Max tokens: 100

--------------------------------------------------------------------------------
SYSTEM MESSAGE:
--------------------------------------------------------------------------------
Reasoning: disabled

--------------------------------------------------------------------------------
USER MESSAGE:
--------------------------------------------------------------------------------
You are a particle physics expert. Your task is to classify whether a jet is initiated by a quark (label: 1) or a gluon (label: 0).

A jet consists of particles, each with the following properties:
- pt: transverse momentum (GeV)
- y: rapidity
- phi: azimuthal angle (radians)
- pid: particle ID

Here is the jet data:
Particle 1: pt=0.269 GeV, y=0.357, phi=4.741, pid=22
Particle 2: pt=0.160 GeV, y=-0.256, phi=4.550, pid=22
Particle 3: pt=1.149 GeV, y=-0.062, phi=4.504, pid=-211
Particl

## Test on 100 Jets

Run the classifier on 100 jets and compute metrics.


In [18]:
# Select 100 jets
n_test = 100
X_test = X[:n_test]
y_test = y[:n_test]

print(f"Testing on {n_test} jets...")
print(f"True distribution: {(y_test == 1).sum()} quark, {(y_test == 0).sum()} gluon")


Testing on 100 jets...
True distribution: 55 quark, 45 gluon


In [19]:
# Make predictions (this will take a while)
from tqdm.auto import tqdm

predictions = []
for i, jet in enumerate(tqdm(X_test)):
    pred = clf.predict([jet])[0]
    predictions.append(pred)
    
    # Print progress every 10 jets
    if (i + 1) % 10 == 0:
        acc = accuracy_score(y_test[:i+1], predictions)
        print(f"After {i+1} jets: Accuracy = {acc:.3f}")

predictions = np.array(predictions)


  0%|          | 0/100 [00:00<?, ?it/s]

 10%|█         | 10/100 [00:05<00:48,  1.86it/s]

After 10 jets: Accuracy = 0.300


 20%|██        | 20/100 [00:10<00:43,  1.83it/s]

After 20 jets: Accuracy = 0.350


 30%|███       | 30/100 [00:16<00:45,  1.54it/s]

After 30 jets: Accuracy = 0.400


 40%|████      | 40/100 [00:22<00:36,  1.64it/s]

After 40 jets: Accuracy = 0.450


 50%|█████     | 50/100 [00:28<00:30,  1.66it/s]

After 50 jets: Accuracy = 0.440


 60%|██████    | 60/100 [00:34<00:21,  1.88it/s]

After 60 jets: Accuracy = 0.467


 70%|███████   | 70/100 [00:40<00:17,  1.67it/s]

After 70 jets: Accuracy = 0.486


 80%|████████  | 80/100 [00:45<00:11,  1.67it/s]

After 80 jets: Accuracy = 0.512


 90%|█████████ | 90/100 [00:51<00:05,  1.79it/s]

After 90 jets: Accuracy = 0.522


100%|██████████| 100/100 [00:58<00:00,  1.71it/s]

After 100 jets: Accuracy = 0.520





## Evaluate Performance


In [20]:
# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
auc = roc_auc_score(y_test, predictions)

print("\n" + "="*50)
print("RESULTS")
print("="*50)
print(f"Accuracy: {accuracy:.3f}")
print(f"AUC Score: {auc:.3f}")
print(f"\nPredicted distribution: {(predictions == 1).sum()} quark, {(predictions == 0).sum()} gluon")
print(f"True distribution: {(y_test == 1).sum()} quark, {(y_test == 0).sum()} gluon")



RESULTS
Accuracy: 0.520
AUC Score: 0.549

Predicted distribution: 21 quark, 79 gluon
True distribution: 55 quark, 45 gluon


In [21]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, predictions)
print("\nConfusion Matrix:")
print("                Predicted")
print("                Gluon  Quark")
print(f"True  Gluon     {cm[0,0]:5d}  {cm[0,1]:5d}")
print(f"      Quark     {cm[1,0]:5d}  {cm[1,1]:5d}")



Confusion Matrix:
                Predicted
                Gluon  Quark
True  Gluon        38      7
      Quark        41     14
