# Model Evaluation Notebook
## ML Model Serving API - Project 5

In [None]:
import pandas as pd
import numpy as np
import joblib
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (accuracy_score, classification_report, 
                             confusion_matrix, roc_curve, auc)
from sklearn.preprocessing import label_binarize

# Load test data
df = pd.read_csv('../data/processed/iris_processed.csv')
X = df.drop(['target', 'species'], axis=1)
y = df['target']

# Load model and metadata
model = joblib.load('../models/v1/model.joblib')
with open('../models/v1/metadata.json', 'r') as f:
    metadata = json.load(f)

print("Model loaded:", metadata['model_type'])
print("Version:", metadata['version'])

In [None]:
# Make predictions
y_pred = model.predict(X)
y_pred_proba = model.predict_proba(X)

# Calculate metrics
accuracy = accuracy_score(y, y_pred)
print(f"Overall Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y, y_pred, target_names=metadata['target_classes'].values()))

In [None]:
# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=metadata['target_classes'].values(),
            yticklabels=metadata['target_classes'].values())
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# Feature Importance
feature_importance = pd.DataFrame({
    'feature': metadata['features'],
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curve (for multiclass)
y_bin = label_binarize(y, classes=[0, 1, 2])
n_classes = y_bin.shape[1]

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_pred_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(8, 6))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for %s' % 
             (roc_auc[i], metadata['target_classes'][str(i)]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Multiclass')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Save evaluation report
evaluation_report = {
    "accuracy": float(accuracy),
    "confusion_matrix": cm.tolist(),
    "feature_importance": feature_importance.to_dict('records'),
    "roc_auc": {str(k): v for k, v in roc_auc.items()},
    "evaluation_date": datetime.now().isoformat()
}

with open('../models/v1/evaluation_report.json', 'w') as f:
    json.dump(evaluation_report, f, indent=2)

print("Evaluation report saved successfully!")