# CookieGuard - Model Training

This notebook trains the Random Forest classifier for cookie categorization.

In [None]:
import sys
sys.path.append('../src')

from model_trainer import ModelTrainer
from onnx_converter import ONNXConverter
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## 1. Generate Synthetic Labels (if needed)

In [None]:
trainer = ModelTrainer()

trainer.generate_synthetic_labels(
    '../data/raw/cookies.json',
    '../data/processed/labeled_cookies.json'
)

## 2. Load and Prepare Data

In [None]:
X, y = trainer.load_and_prepare_data()

## 3. Train Random Forest Model

In [None]:
model = trainer.train_model(n_estimators=100, max_depth=20)

## 4. Evaluate Model Performance

In [None]:
trainer.evaluate_model()

## 5. Feature Importance Visualization

In [None]:
feature_names = trainer.feature_extractor.get_feature_names()
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(12, 6))
plt.title('Feature Importances')
plt.bar(range(len(importances)), importances[indices])
plt.xticks(range(len(importances)), [feature_names[i] for i in indices], rotation=45, ha='right')
plt.ylabel('Importance')
plt.tight_layout()
plt.show()

## 6. Save Model

In [None]:
trainer.save_model('../models/cookie_classifier.pkl')

## 7. Convert to ONNX

In [None]:
converter = ONNXConverter('../models/cookie_classifier.pkl')
converter.load_model()
converter.convert_to_onnx('../../extension/models/cookie-classifier.onnx')
converter.verify_onnx_model('../../extension/models/cookie-classifier.onnx')

## 8. Test Sample Predictions

In [None]:
test_samples = trainer.X_test[:10]
predictions = model.predict(test_samples)
probabilities = model.predict_proba(test_samples)

print("Sample Predictions:")
for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
    print(f"Sample {i+1}: {pred} (confidence: {max(prob):.2f})")