# FairLearnAI - Prompt Classifier Training (GPU Accelerated)

This notebook trains a text classification model to categorize student prompts into: `ALLOWED`, `HINT_ONLY`, `DISALLOWED`, or `OFF_TOPIC`.

**Hardware**: Utilizes NVIDIA RTX 4060 (via CUDA) if available.

In [None]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if device.type == 'cuda':
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

In [None]:
# Load Mock Dataset
try:
    df = pd.read_csv('dataset/mock_data.csv')
    print(f"Loaded {len(df)} examples")
    display(df.head())
except FileNotFoundError:
    print("Error: dataset/mock_data.csv not found. Please ensure the file exists.")

In [None]:
# Preprocessing & Vectorization
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
X = vectorizer.fit_transform(df['text'])
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to dense array for potential GPU processing (sklearn runs on CPU, but we check GPU for future Neural Net upgrade)
# For now, we use Logistic Regression because the dataset is small and it's efficient.
# If you want to use a Transformer (BERT), we would use the 'transformers' library here.

print(f"Training set shape: {X_train.shape}")

In [None]:
# Train Model
print("Training classifier...")
clf = LogisticRegression(random_state=42, max_iter=1000)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Export Model (Simulated)
# In a real scenario, we would save this to a .pkl file to be served by a Python API (FastAPI)
# or convert coefficients to JSON for Node.js usage.

print("Saving model artifacts...")
with open('classifier_model.pkl', 'wb') as f:
    pickle.dump(clf, f)
    
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)
    
print("Model saved to disk.")