# Model Evaluation
Evaluate the trained model against the held-out test set.


This notebook produces standard classification metrics to assess the model trained in the previous step.


In [None]:
params = {
    'test_path': 'data/test_features.csv',
    'feature_metadata_path': 'data/feature_metadata.json',
    'model_path': 'models/random_forest.pkl',
    'metrics_output_path': 'models/metrics.json'
}


In [None]:
from pathlib import Path
import json

import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

test_path = Path(params['test_path'])
metadata_path = Path(params['feature_metadata_path'])
model_path = Path(params['model_path'])
metrics_output_path = Path(params['metrics_output_path'])

metrics_output_path.parent.mkdir(parents=True, exist_ok=True)

metadata = json.loads(metadata_path.read_text())
target_column = metadata['target_column']
feature_columns = metadata['feature_columns']

test_df = pd.read_csv(test_path)
X_test = test_df[feature_columns]
y_test = test_df[target_column]

model = joblib.load(model_path)
y_pred = model.predict(X_test)
proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

metrics = {
    'accuracy': float(accuracy_score(y_test, y_pred)),
    'precision': float(precision_score(y_test, y_pred)),
    'recall': float(recall_score(y_test, y_pred)),
    'f1_score': float(f1_score(y_test, y_pred)),
}
if proba is not None:
    try:
        metrics['roc_auc'] = float(roc_auc_score(y_test, proba))
    except ValueError:
        metrics['roc_auc'] = None
else:
    metrics['roc_auc'] = None

with metrics_output_path.open('w') as fp:
    json.dump(metrics, fp, indent=2)

metrics
