# 03. MLflow Experiment Tracking

**MLOps Assignment - BITS Pilani (S1-25_AIMLCZG523)**

---

## Objectives (5 marks)
1. **Integrate MLflow** for experiment tracking
2. **Log Parameters** - Hyperparameters for each model
3. **Log Metrics** - Accuracy, Precision, Recall, F1, ROC-AUC
4. **Log Artifacts** - Confusion matrix, ROC curves, models
5. **Compare Experiments** - Analyze multiple runs

---

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import sys

# MLflow
import mlflow
import mlflow.sklearn

# Scikit-learn
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, roc_curve
)

warnings.filterwarnings('ignore')

print("‚úÖ Libraries loaded successfully!")
print(f"MLflow version: {mlflow.__version__}")

ModuleNotFoundError: No module named 'pandas'

## 1. Setup MLflow Tracking

In [None]:
# Setup MLflow tracking URI (local file-based tracking)
# Get the project root directory (parent of notebooks folder)
PROJECT_ROOT = os.path.dirname(os.path.abspath(os.getcwd())) if os.getcwd().endswith('notebooks') else os.getcwd()

# Change to project root for correct relative paths
os.chdir(PROJECT_ROOT)
print(f"Working directory: {os.getcwd()}")

# Setup MLflow with file:// URI
mlflow_dir = os.path.join(PROJECT_ROOT, 'mlruns')
os.makedirs(mlflow_dir, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlflow_dir}")

# Create experiment
EXPERIMENT_NAME = "heart_disease_classification"
mlflow.set_experiment(EXPERIMENT_NAME)

# Disable autologging to avoid conflicts in notebook
mlflow.sklearn.autolog(disable=True)

print("=" * 60)
print("MLFLOW TRACKING SETUP")
print("=" * 60)
print(f"Tracking URI: file://{mlflow_dir}")
print(f"Experiment: {EXPERIMENT_NAME}")
print("=" * 60)

## 2. Load and Prepare Data

In [None]:
# Load data (using absolute path based on PROJECT_ROOT)
data_path = os.path.join(PROJECT_ROOT, 'data/processed/heart_disease_clean.csv')
print(f"Loading data from: {data_path}")
df = pd.read_csv(data_path)

# Define features
NUMERICAL_FEATURES = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
CATEGORICAL_FEATURES = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']

# Split features and target
X = df.drop('target', axis=1)
y = df['target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Build preprocessing pipeline
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), NUMERICAL_FEATURES),
    ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), CATEGORICAL_FEATURES)
])

# Create screenshots directory
SCREENSHOTS_DIR = os.path.join(PROJECT_ROOT, 'screenshots')
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

## 3. Experiment 1: Logistic Regression with MLflow Tracking


In [None]:
# EXPERIMENT 1: Logistic Regression
with mlflow.start_run(run_name="LogisticRegression_Experiment") as run:
    
    # Log tags
    mlflow.set_tags({
        "model_type": "LogisticRegression",
        "dataset": "heart_disease_uci",
        "phase": "experiment"
    })
    
    # Define hyperparameters
    params = {
        'C': 1.0,
        'penalty': 'l2',
        'solver': 'lbfgs',
        'max_iter': 1000
    }
    
    # Log parameters
    mlflow.log_params(params)
    print("‚úÖ Parameters logged:")
    for k, v in params.items():
        print(f"   {k}: {v}")
    
    # Create pipeline
    lr_pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', LogisticRegression(**params, random_state=42))
    ])
    
    # Train model
    lr_pipeline.fit(X_train, y_train)
    
    # Predictions
    y_pred = lr_pipeline.predict(X_test)
    y_prob = lr_pipeline.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1_score': f1_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_prob)
    }
    
    # Log metrics
    mlflow.log_metrics(metrics)
    print("\n‚úÖ Metrics logged:")
    for k, v in metrics.items():
        print(f"   {k}: {v:.4f}")
    
    # Create confusion matrix plot (save locally then log)
    fig, ax = plt.subplots(figsize=(6, 5))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title('Confusion Matrix - Logistic Regression')
    ax.set_ylabel('True Label')
    ax.set_xlabel('Predicted Label')
    plt.tight_layout()
    lr_cm_path = os.path.join(SCREENSHOTS_DIR, 'lr_confusion_matrix.png')
    fig.savefig(lr_cm_path, dpi=150)
    mlflow.log_artifact(lr_cm_path)
    plt.close()
    print("\n‚úÖ Artifact logged: lr_confusion_matrix.png")
    
    # Create ROC curve (save locally then log)
    fig, ax = plt.subplots(figsize=(6, 5))
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    ax.plot(fpr, tpr, 'b-', linewidth=2, label=f'ROC (AUC = {metrics["roc_auc"]:.3f})')
    ax.plot([0, 1], [0, 1], 'k--', linewidth=1)
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve - Logistic Regression')
    ax.legend(loc='lower right')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    lr_roc_path = os.path.join(SCREENSHOTS_DIR, 'lr_roc_curve.png')
    fig.savefig(lr_roc_path, dpi=150)
    mlflow.log_artifact(lr_roc_path)
    plt.close()
    print("‚úÖ Artifact logged: lr_roc_curve.png")
    
    # Log model
    mlflow.sklearn.log_model(lr_pipeline, "model")
    print("‚úÖ Model logged")
    
    lr_run_id = run.info.run_id
    print(f"\nüìù Run ID: {lr_run_id}")


## 4. Experiment 2: Random Forest with MLflow Tracking


In [None]:
# EXPERIMENT 2: Random Forest
with mlflow.start_run(run_name="RandomForest_Experiment") as run:
    
    # Log tags
    mlflow.set_tags({
        "model_type": "RandomForest",
        "dataset": "heart_disease_uci",
        "phase": "experiment"
    })
    
    # Define hyperparameters
    params = {
        'n_estimators': 200,
        'max_depth': 10,
        'min_samples_split': 5,
        'min_samples_leaf': 1
    }
    
    # Log parameters
    mlflow.log_params(params)
    print("‚úÖ Parameters logged:")
    for k, v in params.items():
        print(f"   {k}: {v}")
    
    # Create pipeline
    rf_pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(**params, random_state=42))
    ])
    
    # Train model
    rf_pipeline.fit(X_train, y_train)
    
    # Predictions
    y_pred = rf_pipeline.predict(X_test)
    y_prob = rf_pipeline.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1_score': f1_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_prob)
    }
    
    # Log metrics
    mlflow.log_metrics(metrics)
    print("\n‚úÖ Metrics logged:")
    for k, v in metrics.items():
        print(f"   {k}: {v:.4f}")
    
    # Create confusion matrix (save locally then log)
    fig, ax = plt.subplots(figsize=(6, 5))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', ax=ax)
    ax.set_title('Confusion Matrix - Random Forest')
    ax.set_ylabel('True Label')
    ax.set_xlabel('Predicted Label')
    plt.tight_layout()
    rf_cm_path = os.path.join(SCREENSHOTS_DIR, 'rf_confusion_matrix.png')
    fig.savefig(rf_cm_path, dpi=150)
    mlflow.log_artifact(rf_cm_path)
    plt.close()
    print("\n‚úÖ Artifact logged: rf_confusion_matrix.png")
    
    # Create ROC curve (save locally then log)
    fig, ax = plt.subplots(figsize=(6, 5))
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    ax.plot(fpr, tpr, 'g-', linewidth=2, label=f'ROC (AUC = {metrics["roc_auc"]:.3f})')
    ax.plot([0, 1], [0, 1], 'k--', linewidth=1)
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve - Random Forest')
    ax.legend(loc='lower right')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    rf_roc_path = os.path.join(SCREENSHOTS_DIR, 'rf_roc_curve.png')
    fig.savefig(rf_roc_path, dpi=150)
    mlflow.log_artifact(rf_roc_path)
    plt.close()
    print("‚úÖ Artifact logged: rf_roc_curve.png")
    
    # Log model
    mlflow.sklearn.log_model(rf_pipeline, "model")
    print("‚úÖ Model logged")
    
    rf_run_id = run.info.run_id
    print(f"\nüìù Run ID: {rf_run_id}")


## 6. MLflow Tracking Summary


In [None]:
# SUMMARY
print("=" * 70)
print("               MLFLOW EXPERIMENT TRACKING SUMMARY")
print("=" * 70)
print("""
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ                    WHAT WE LOGGED TO MLFLOW                         ‚îÇ
‚îú‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î§
‚îÇ  ‚úÖ PARAMETERS:                                                      ‚îÇ
‚îÇ     - Model hyperparameters (C, penalty, n_estimators, etc.)        ‚îÇ
‚îÇ     - Training configuration                                         ‚îÇ
‚îÇ                                                                      ‚îÇ
‚îÇ  ‚úÖ METRICS:                                                         ‚îÇ
‚îÇ     - Accuracy, Precision, Recall, F1-Score, ROC-AUC                ‚îÇ
‚îÇ     - Logged for each experiment run                                 ‚îÇ
‚îÇ                                                                      ‚îÇ
‚îÇ  ‚úÖ ARTIFACTS:                                                       ‚îÇ
‚îÇ     - Confusion matrix plots (PNG)                                   ‚îÇ
‚îÇ     - ROC curve plots (PNG)                                          ‚îÇ
‚îÇ     - Trained model files (pickle)                                   ‚îÇ
‚îÇ                                                                      ‚îÇ
‚îÇ  ‚úÖ TAGS:                                                            ‚îÇ
‚îÇ     - model_type, dataset, phase                                     ‚îÇ
‚îÇ                                                                      ‚îÇ
‚îÇ  üìÅ MLflow Tracking Directory: mlruns/                               ‚îÇ
‚îÇ  üåê To view UI: mlflow ui --backend-store-uri ./mlruns              ‚îÇ
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
""")
print("‚úÖ Experiment Tracking with MLflow COMPLETE!")
print("=" * 70)


In [None]:
# Query all runs from the experiment
client = mlflow.tracking.MlflowClient()
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

if experiment:
    runs = client.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["metrics.roc_auc DESC"]
    )
    
    print("=" * 70)
    print("                    MLFLOW EXPERIMENT RUNS")
    print("=" * 70)
    
    comparison_data = []
    for run in runs[:10]:  # Show top 10 runs
        run_data = {
            'Run Name': run.data.tags.get('mlflow.runName', 'N/A'),
            'Model Type': run.data.tags.get('model_type', 'N/A'),
            'Accuracy': run.data.metrics.get('accuracy', 0),
            'Precision': run.data.metrics.get('precision', 0),
            'Recall': run.data.metrics.get('recall', 0),
            'F1-Score': run.data.metrics.get('f1_score', 0),
            'ROC-AUC': run.data.metrics.get('roc_auc', 0),
        }
        comparison_data.append(run_data)
    
    comparison_df = pd.DataFrame(comparison_data)
    print(comparison_df.to_string(index=False))
    print("=" * 70)
else:
    print("No experiment found!")
