# Model Training Notebook
## ML Model Serving API - Project 5

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import json
from datetime import datetime

# Load processed data
df = pd.read_csv('../data/processed/iris_processed.csv')
print("Data shape:", df.shape)
df.head()

In [None]:
# Prepare features and target
X = df.drop(['target', 'species'], axis=1)
y = df['target']

print("Features shape:", X.shape)
print("Target shape:", y.shape)

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training set:", X_train.shape, y_train.shape)
print("Test set:", X_test.shape, y_test.shape)

In [None]:
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("Model trained successfully!")

In [None]:
# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=df['species'].unique()))

In [None]:
# Prepare metadata
metadata = {
    "version": "v1",
    "model_type": "RandomForestClassifier",
    "training_date": datetime.now().isoformat(),
    "features": X.columns.tolist(),
    "target_classes": {0: "setosa", 1: "versicolor", 2: "virginica"},
    "performance": {
        "accuracy": float(accuracy),
        "n_estimators": 100,
        "random_state": 42
    },
    "preprocessing": {
        "normalization": "none",
        "imputation": "none"
    }
}

print("Model metadata prepared:")
print(json.dumps(metadata, indent=2))

In [None]:
# Save model and metadata
import os
os.makedirs('../models/v1', exist_ok=True)

joblib.dump(model, '../models/v1/model.joblib')
with open('../models/v1/metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("Model and metadata saved successfully!")