In [None]:
# !pip install --upgrade pip setuptools wheel
# !pip install pyarrow==10.0.1
# !pip install mlflow==2.12.1 scikit-learn pandas numpy matplotlib seaborn

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

plt.style.use('default')
sns.set_palette("husl")

print("📦 All packages imported successfully!")
print(f"MLflow version: {mlflow.__version__}")

In [None]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

print("📊 Dataset Information:")
print(f"Features: {list(X.columns)}")
print(f"Target classes: {iris.target_names}")
print(f"Dataset shape: {X.shape}")
print(f"Target distribution: {np.bincount(y)}")

print("\n🔍 First 5 rows:")
display(X.head())

In [None]:
def train_and_log_model(model_type="random_forest", **model_params):
    """Train a model and log everything to MLflow"""
    
    with mlflow.start_run(run_name=f"{model_type}_experiment"):
        iris = load_iris()
        X = pd.DataFrame(iris.data, columns=iris.feature_names)
        y = iris.target
        
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        mlflow.log_param("dataset", "iris")
        mlflow.log_param("n_samples", len(X))
        mlflow.log_param("n_features", X.shape[1])
        mlflow.log_param("test_size", 0.2)
        mlflow.log_param("random_state", 42)
        
        if model_type == "random_forest":
            model = RandomForestClassifier(**model_params, random_state=42)
        elif model_type == "logistic_regression":
            model = LogisticRegression(**model_params, random_state=42, max_iter=1000)
        elif model_type == "svm":
            model = SVC(**model_params, random_state=42, probability=True)
        else:
            raise ValueError(f"Unknown model type: {model_type}")
        
        mlflow.log_param("model_type", model_type)
        for param, value in model_params.items():
            mlflow.log_param(param, value)
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        
        from sklearn.metrics import confusion_matrix
        cm = confusion_matrix(y_test, y_pred)
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=iris.target_names, 
                   yticklabels=iris.target_names)
        plt.title(f'Confusion Matrix - {model_type}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig("confusion_matrix.png")
        mlflow.log_artifact("confusion_matrix.png")
        plt.show()
        
        if hasattr(model, 'feature_importances_'):
            plt.figure(figsize=(10, 6))
            feature_importance = pd.DataFrame({
                'feature': iris.feature_names,
                'importance': model.feature_importances_
            }).sort_values('importance', ascending=False)
            
            sns.barplot(data=feature_importance, x='importance', y='feature')
            plt.title(f'Feature Importance - {model_type}')
            plt.tight_layout()
            plt.savefig("feature_importance.png")
            mlflow.log_artifact("feature_importance.png")
            plt.show()
        
        mlflow.sklearn.log_model(
            model, 
            "model",
            registered_model_name=f"iris_{model_type}"
        )
        
        predictions_df = pd.DataFrame({
            'true_label': y_test,
            'predicted_label': y_pred,
            'prediction_probability': np.max(y_pred_proba, axis=1)
        })
        predictions_df.to_csv("predictions.csv", index=False)
        mlflow.log_artifact("predictions.csv")
        
        print(f"✅ {model_type} experiment logged successfully!")
        print(f"   Accuracy: {accuracy:.4f}")
        print(f"   Run ID: {mlflow.active_run().info.run_id}")
        
        return model, accuracy

In [None]:
experiment_name = "iris_model_comparison_notebook"
mlflow.set_experiment(experiment_name)

print(f"🧪 Experiment set: {experiment_name}")
print(f"📍 MLflow tracking URI: {mlflow.get_tracking_uri()}")

In [None]:
print("🌲 Training a Random Forest model...")
model, accuracy = train_and_log_model(
    model_type="random_forest", 
    n_estimators=100, 
    max_depth=5
)

print(f"\n📊 Model trained with accuracy: {accuracy:.4f}")

In [None]:
def run_comparison_experiments():
    """Run multiple experiments with different models for comparison"""
    
    print("🚀 Starting MLflow experiment comparison...")
    
    experiments = [
        ("random_forest", {"n_estimators": 100, "max_depth": 5}),
        ("random_forest", {"n_estimators": 200, "max_depth": 10}),
        ("logistic_regression", {"C": 1.0, "solver": "lbfgs"}),
        ("logistic_regression", {"C": 0.1, "solver": "lbfgs"}),
        ("svm", {"C": 1.0, "kernel": "rbf"}),
        ("svm", {"C": 1.0, "kernel": "linear"}),
    ]
    
    results = []
    for i, (model_type, params) in enumerate(experiments):
        print(f"\n📊 Running experiment {i+1}/{len(experiments)}: {model_type} with params: {params}")
        model, accuracy = train_and_log_model(model_type, **params)
        results.append((model_type, params, accuracy))
    
    print("\n📈 Experiment Results Summary:")
    print("-" * 80)
    print(f"{'Model Type':<20} | {'Parameters':<35} | {'Accuracy':<10}")
    print("-" * 80)
    for model_type, params, accuracy in results:
        param_str = str(params)[:35] + "..." if len(str(params)) > 35 else str(params)
        print(f"{model_type:<20} | {param_str:<35} | {accuracy:.4f}")
    
    best_model = max(results, key=lambda x: x[2])
    print(f"\n🎯 Best model: {best_model[0]} with accuracy {best_model[2]:.4f}")
    
    return results

In [None]:
results = run_comparison_experiments()

In [None]:
results_df = pd.DataFrame([
    {
        'Model': f"{model_type}_{i}",
        'Model_Type': model_type,
        'Accuracy': accuracy,
        'Parameters': str(params)
    }
    for i, (model_type, params, accuracy) in enumerate(results)
])

plt.figure(figsize=(12, 8))

plt.subplot(2, 1, 1)
sns.barplot(data=results_df, x='Model', y='Accuracy', hue='Model_Type')
plt.title('Model Performance Comparison')
plt.xticks(rotation=45)
plt.ylabel('Accuracy')

plt.subplot(2, 1, 2)
model_type_avg = results_df.groupby('Model_Type')['Accuracy'].mean().reset_index()
sns.barplot(data=model_type_avg, x='Model_Type', y='Accuracy')
plt.title('Average Accuracy by Model Type')
plt.ylabel('Average Accuracy')

plt.tight_layout()
plt.show()

print("\n📊 Detailed Results:")
display(results_df[['Model_Type', 'Accuracy', 'Parameters']])

In [None]:
temp_files = ["confusion_matrix.png", "feature_importance.png", "predictions.csv"]
cleaned_files = []

for file in temp_files:
    if os.path.exists(file):
        os.remove(file)
        cleaned_files.append(file)

if cleaned_files:
    print(f"🧹 Cleaned up files: {cleaned_files}")
else:
    print("✅ No temporary files to clean up")