# SMS Spam Classification with MLflow

This notebook demonstrates experiment tracking and model version control using MLflow.
We'll build 3 benchmark models and track their performance using AUCPR (Area Under Precision-Recall Curve) as the primary metric.

## 1. Import Libraries

In [5]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
    precision_recall_curve,
    auc
)
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

Libraries imported successfully!


## 2. MLflow Setup

In [6]:
# Set experiment name
EXPERIMENT_NAME = "SMS_Spam_Classification"
mlflow.set_experiment(EXPERIMENT_NAME)

# Get experiment info
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
print(f"Experiment Name: {experiment.name}")
print(f"Experiment ID: {experiment.experiment_id}")
print(f"Artifact Location: {experiment.artifact_location}")
print(f"\nMLflow Tracking URI: {mlflow.get_tracking_uri()}")

2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/16 02:38:15 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/16 02:38:18 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/16 02:38:18 INFO alembic.runtime.migration: Will assume non-transactional DDL.


Experiment Name: SMS_Spam_Classification
Experiment ID: 1
Artifact Location: file:///C:/Users/Raja/AML/AML_DVC/mlruns/1

MLflow Tracking URI: sqlite:///mlflow.db


## 3. Load Data

In [7]:
# Load datasets
train_df = pd.read_csv('train.csv')
val_df = pd.read_csv('validation.csv')
test_df = pd.read_csv('test.csv')

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")
print(f"\nClass distribution in training set:")
print(train_df['label'].value_counts())
print(f"\nSpam ratio: {(train_df['label'] == 'spam').sum() / len(train_df):.2%}")

Training samples: 4457
Validation samples: 557
Test samples: 558

Class distribution in training set:
label
ham     3857
spam     600
Name: count, dtype: int64

Spam ratio: 13.46%


## 4. Prepare Data for Training

In [8]:
# Extract features and labels
X_train = train_df['text'].values
y_train = (train_df['label'] == 'spam').astype(int).values

X_val = val_df['text'].values
y_val = (val_df['label'] == 'spam').astype(int).values

X_test = test_df['text'].values
y_test = (test_df['label'] == 'spam').astype(int).values

print("Data prepared successfully!")
print(f"Training labels - Ham: {(y_train == 0).sum()}, Spam: {(y_train == 1).sum()}")
print(f"Validation labels - Ham: {(y_val == 0).sum()}, Spam: {(y_val == 1).sum()}")
print(f"Test labels - Ham: {(y_test == 0).sum()}, Spam: {(y_test == 1).sum()}")

Data prepared successfully!
Training labels - Ham: 3857, Spam: 600
Validation labels - Ham: 490, Spam: 67
Test labels - Ham: 478, Spam: 80


## 5. Helper Functions for Evaluation

In [9]:
def calculate_metrics(y_true, y_pred, y_pred_proba):
    """Calculate comprehensive metrics including AUCPR"""
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'roc_auc': roc_auc_score(y_true, y_pred_proba),
        'aucpr': average_precision_score(y_true, y_pred_proba)  # Area Under PR Curve
    }
    return metrics

def print_metrics(metrics, dataset_name=""):
    """Pretty print metrics"""
    print(f"\n{'='*50}")
    print(f"{dataset_name} Metrics")
    print(f"{'='*50}")
    print(f"Accuracy:  {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall:    {metrics['recall']:.4f}")
    print(f"F1 Score:  {metrics['f1_score']:.4f}")
    print(f"ROC AUC:   {metrics['roc_auc']:.4f}")
    print(f"AUCPR:     {metrics['aucpr']:.4f}")
    print(f"{'='*50}")

print("Helper functions defined!")

Helper functions defined!


## 6. Model 1: Logistic Regression

In [10]:
# Start MLflow run
with mlflow.start_run(run_name="Logistic_Regression") as run:
    print(f"MLflow Run ID: {run.info.run_id}")
    
    # Model parameters
    params = {
        'model_type': 'Logistic Regression',
        'max_features': 5000,
        'ngram_range': '(1, 2)',
        'C': 1.0,
        'max_iter': 1000,
        'solver': 'liblinear'
    }
    
    # Log parameters
    mlflow.log_params(params)
    
    # Create pipeline
    model = Pipeline([
        ('tfidf', TfidfVectorizer(max_features=5000, ngram_range=(1, 2))),
        ('classifier', LogisticRegression(C=1.0, max_iter=1000, solver='liblinear', random_state=42))
    ])
    
    # Train model
    print("\nTraining Logistic Regression...")
    model.fit(X_train, y_train)
    
    # Predictions
    y_train_pred = model.predict(X_train)
    y_train_proba = model.predict_proba(X_train)[:, 1]
    
    y_val_pred = model.predict(X_val)
    y_val_proba = model.predict_proba(X_val)[:, 1]
    
    y_test_pred = model.predict(X_test)
    y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    train_metrics = calculate_metrics(y_train, y_train_pred, y_train_proba)
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)
    
    # Log metrics to MLflow
    for metric_name, value in train_metrics.items():
        mlflow.log_metric(f"train_{metric_name}", value)
    for metric_name, value in val_metrics.items():
        mlflow.log_metric(f"val_{metric_name}", value)
    for metric_name, value in test_metrics.items():
        mlflow.log_metric(f"test_{metric_name}", value)
    
    # Print results
    print_metrics(train_metrics, "Training")
    print_metrics(val_metrics, "Validation")
    print_metrics(test_metrics, "Test")
    
    # Log model to MLflow
    mlflow.sklearn.log_model(
        model,
        "model",
        registered_model_name="SMS_Spam_LogisticRegression"
    )
    
    print(f"\n‚úì Model registered as 'SMS_Spam_LogisticRegression'")
    print(f"‚úì Test AUCPR: {test_metrics['aucpr']:.4f}")
    
    lr_run_id = run.info.run_id
    lr_aucpr = test_metrics['aucpr']

MLflow Run ID: ef234570f6fb4d01b833cb27c7aa0a06

Training Logistic Regression...





Training Metrics
Accuracy:  0.9809
Precision: 1.0000
Recall:    0.8583
F1 Score:  0.9238
ROC AUC:   0.9982
AUCPR:     0.9923

Validation Metrics
Accuracy:  0.9785
Precision: 1.0000
Recall:    0.8209
F1 Score:  0.9016
ROC AUC:   0.9980
AUCPR:     0.9887

Test Metrics
Accuracy:  0.9624
Precision: 1.0000
Recall:    0.7375
F1 Score:  0.8489
ROC AUC:   0.9852
AUCPR:     0.9766


Registered model 'SMS_Spam_LogisticRegression' already exists. Creating a new version of this model...
Created version '2' of model 'SMS_Spam_LogisticRegression'.



‚úì Model registered as 'SMS_Spam_LogisticRegression'
‚úì Test AUCPR: 0.9766


## 7. Model 2: Multinomial Naive Bayes

In [11]:
# Start MLflow run
with mlflow.start_run(run_name="Naive_Bayes") as run:
    print(f"MLflow Run ID: {run.info.run_id}")
    
    # Model parameters
    params = {
        'model_type': 'Multinomial Naive Bayes',
        'max_features': 5000,
        'ngram_range': '(1, 2)',
        'alpha': 1.0
    }
    
    # Log parameters
    mlflow.log_params(params)
    
    # Create pipeline
    model = Pipeline([
        ('tfidf', TfidfVectorizer(max_features=5000, ngram_range=(1, 2))),
        ('classifier', MultinomialNB(alpha=1.0))
    ])
    
    # Train model
    print("\nTraining Multinomial Naive Bayes...")
    model.fit(X_train, y_train)
    
    # Predictions
    y_train_pred = model.predict(X_train)
    y_train_proba = model.predict_proba(X_train)[:, 1]
    
    y_val_pred = model.predict(X_val)
    y_val_proba = model.predict_proba(X_val)[:, 1]
    
    y_test_pred = model.predict(X_test)
    y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    train_metrics = calculate_metrics(y_train, y_train_pred, y_train_proba)
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)
    
    # Log metrics to MLflow
    for metric_name, value in train_metrics.items():
        mlflow.log_metric(f"train_{metric_name}", value)
    for metric_name, value in val_metrics.items():
        mlflow.log_metric(f"val_{metric_name}", value)
    for metric_name, value in test_metrics.items():
        mlflow.log_metric(f"test_{metric_name}", value)
    
    # Print results
    print_metrics(train_metrics, "Training")
    print_metrics(val_metrics, "Validation")
    print_metrics(test_metrics, "Test")
    
    # Log model to MLflow
    mlflow.sklearn.log_model(
        model,
        "model",
        registered_model_name="SMS_Spam_NaiveBayes"
    )
    
    print(f"\n‚úì Model registered as 'SMS_Spam_NaiveBayes'")
    print(f"‚úì Test AUCPR: {test_metrics['aucpr']:.4f}")
    
    nb_run_id = run.info.run_id
    nb_aucpr = test_metrics['aucpr']

MLflow Run ID: b6b588f3dad340c3b088188158f0dd45

Training Multinomial Naive Bayes...





Training Metrics
Accuracy:  0.9818
Precision: 1.0000
Recall:    0.8650
F1 Score:  0.9276
ROC AUC:   0.9933
AUCPR:     0.9825

Validation Metrics
Accuracy:  0.9820
Precision: 1.0000
Recall:    0.8507
F1 Score:  0.9194
ROC AUC:   0.9938
AUCPR:     0.9784

Test Metrics
Accuracy:  0.9677
Precision: 1.0000
Recall:    0.7750
F1 Score:  0.8732
ROC AUC:   0.9840
AUCPR:     0.9675


Registered model 'SMS_Spam_NaiveBayes' already exists. Creating a new version of this model...
Created version '2' of model 'SMS_Spam_NaiveBayes'.



‚úì Model registered as 'SMS_Spam_NaiveBayes'
‚úì Test AUCPR: 0.9675


## 8. Model 3: Random Forest

In [12]:
# Start MLflow run
with mlflow.start_run(run_name="Random_Forest") as run:
    print(f"MLflow Run ID: {run.info.run_id}")
    
    # Model parameters
    params = {
        'model_type': 'Random Forest',
        'max_features': 5000,
        'ngram_range': '(1, 2)',
        'n_estimators': 100,
        'max_depth': 20,
        'min_samples_split': 5,
        'min_samples_leaf': 2
    }
    
    # Log parameters
    mlflow.log_params(params)
    
    # Create pipeline
    model = Pipeline([
        ('tfidf', TfidfVectorizer(max_features=5000, ngram_range=(1, 2))),
        ('classifier', RandomForestClassifier(
            n_estimators=100,
            max_depth=20,
            min_samples_split=5,
            min_samples_leaf=2,
            random_state=42,
            n_jobs=-1
        ))
    ])
    
    # Train model
    print("\nTraining Random Forest...")
    model.fit(X_train, y_train)
    
    # Predictions
    y_train_pred = model.predict(X_train)
    y_train_proba = model.predict_proba(X_train)[:, 1]
    
    y_val_pred = model.predict(X_val)
    y_val_proba = model.predict_proba(X_val)[:, 1]
    
    y_test_pred = model.predict(X_test)
    y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    train_metrics = calculate_metrics(y_train, y_train_pred, y_train_proba)
    val_metrics = calculate_metrics(y_val, y_val_pred, y_val_proba)
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_proba)
    
    # Log metrics to MLflow
    for metric_name, value in train_metrics.items():
        mlflow.log_metric(f"train_{metric_name}", value)
    for metric_name, value in val_metrics.items():
        mlflow.log_metric(f"val_{metric_name}", value)
    for metric_name, value in test_metrics.items():
        mlflow.log_metric(f"test_{metric_name}", value)
    
    # Print results
    print_metrics(train_metrics, "Training")
    print_metrics(val_metrics, "Validation")
    print_metrics(test_metrics, "Test")
    
    # Log model to MLflow
    mlflow.sklearn.log_model(
        model,
        "model",
        registered_model_name="SMS_Spam_RandomForest"
    )
    
    print(f"\n‚úì Model registered as 'SMS_Spam_RandomForest'")
    print(f"‚úì Test AUCPR: {test_metrics['aucpr']:.4f}")
    
    rf_run_id = run.info.run_id
    rf_aucpr = test_metrics['aucpr']

MLflow Run ID: d4e5467716544f4d848533d2ee1563ca

Training Random Forest...





Training Metrics
Accuracy:  0.9659
Precision: 1.0000
Recall:    0.7467
F1 Score:  0.8550
ROC AUC:   0.9938
AUCPR:     0.9832

Validation Metrics
Accuracy:  0.9695
Precision: 1.0000
Recall:    0.7463
F1 Score:  0.8547
ROC AUC:   0.9943
AUCPR:     0.9765

Test Metrics
Accuracy:  0.9444
Precision: 1.0000
Recall:    0.6125
F1 Score:  0.7597
ROC AUC:   0.9841
AUCPR:     0.9636


Registered model 'SMS_Spam_RandomForest' already exists. Creating a new version of this model...
Created version '2' of model 'SMS_Spam_RandomForest'.



‚úì Model registered as 'SMS_Spam_RandomForest'
‚úì Test AUCPR: 0.9636


## 9. Model Comparison - AUCPR Summary

In [13]:
# Retrieve all runs from the experiment
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])

# Filter and display AUCPR metrics
print("\n" + "="*70)
print("MODEL COMPARISON - AUCPR (Area Under Precision-Recall Curve)")
print("="*70)

# Sort by test AUCPR (descending)
runs_sorted = runs.sort_values('metrics.test_aucpr', ascending=False)

print(f"\n{'Model':<30} {'Test AUCPR':<15} {'Val AUCPR':<15} {'Train AUCPR':<15}")
print("-"*70)

for idx, row in runs_sorted.iterrows():
    model_name = row['tags.mlflow.runName']
    test_aucpr = row['metrics.test_aucpr']
    val_aucpr = row['metrics.val_aucpr']
    train_aucpr = row['metrics.train_aucpr']
    
    print(f"{model_name:<30} {test_aucpr:<15.4f} {val_aucpr:<15.4f} {train_aucpr:<15.4f}")

print("\n" + "="*70)

# Find best model
best_model_row = runs_sorted.iloc[0]
best_model_name = best_model_row['tags.mlflow.runName']
best_aucpr = best_model_row['metrics.test_aucpr']

print(f"\nüèÜ BEST MODEL: {best_model_name}")
print(f"   Test AUCPR: {best_aucpr:.4f}")
print(f"   Run ID: {best_model_row['run_id']}")
print("="*70)


MODEL COMPARISON - AUCPR (Area Under Precision-Recall Curve)

Model                          Test AUCPR      Val AUCPR       Train AUCPR    
----------------------------------------------------------------------
Logistic_Regression            0.9766          0.9887          0.9923         
Logistic_Regression            0.9766          0.9887          0.9923         
Naive_Bayes                    0.9675          0.9784          0.9825         
Naive_Bayes                    0.9675          0.9784          0.9825         
Random_Forest                  0.9636          0.9765          0.9832         
Random_Forest                  0.9636          0.9765          0.9832         


üèÜ BEST MODEL: Logistic_Regression
   Test AUCPR: 0.9766
   Run ID: 0313136c4a0b42db97fe3c3120198423


## 10. Retrieve and Print Individual Model AUCPR

In [14]:
# Retrieve AUCPR for each model from MLflow
print("\n" + "="*70)
print("INDIVIDUAL MODEL AUCPR RETRIEVAL FROM MLFLOW")
print("="*70)

model_names = [
    "Logistic_Regression",
    "Naive_Bayes",
    "Random_Forest"
]

for model_name in model_names:
    # Search for runs with this name
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        filter_string=f"tags.mlflow.runName = '{model_name}'",
        order_by=["start_time DESC"],
        max_results=1
    )
    
    if len(runs) > 0:
        run = runs.iloc[0]
        run_id = run['run_id']
        test_aucpr = run['metrics.test_aucpr']
        val_aucpr = run['metrics.val_aucpr']
        train_aucpr = run['metrics.train_aucpr']
        
        print(f"\nüìä {model_name}")
        print(f"   Run ID: {run_id}")
        print(f"   Test AUCPR:       {test_aucpr:.4f}")
        print(f"   Validation AUCPR: {val_aucpr:.4f}")
        print(f"   Training AUCPR:   {train_aucpr:.4f}")
        print(f"   {'-'*50}")

print("\n" + "="*70)


INDIVIDUAL MODEL AUCPR RETRIEVAL FROM MLFLOW

üìä Logistic_Regression
   Run ID: ef234570f6fb4d01b833cb27c7aa0a06
   Test AUCPR:       0.9766
   Validation AUCPR: 0.9887
   Training AUCPR:   0.9923
   --------------------------------------------------

üìä Naive_Bayes
   Run ID: b6b588f3dad340c3b088188158f0dd45
   Test AUCPR:       0.9675
   Validation AUCPR: 0.9784
   Training AUCPR:   0.9825
   --------------------------------------------------

üìä Random_Forest
   Run ID: d4e5467716544f4d848533d2ee1563ca
   Test AUCPR:       0.9636
   Validation AUCPR: 0.9765
   Training AUCPR:   0.9832
   --------------------------------------------------



## 11. View Registered Models

In [15]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

print("\n" + "="*70)
print("REGISTERED MODELS IN MLFLOW MODEL REGISTRY")
print("="*70)

# List all registered models
registered_models = client.search_registered_models()

for rm in registered_models:
    if rm.name.startswith("SMS_Spam_"):
        print(f"\nModel Name: {rm.name}")
        print(f"Description: {rm.description if rm.description else 'N/A'}")
        print(f"Latest Version: {rm.latest_versions[0].version if rm.latest_versions else 'N/A'}")
        print(f"Last Updated: {rm.last_updated_timestamp}")
        print("-"*70)

print("\n‚úì All models successfully registered and tracked with MLflow!")


REGISTERED MODELS IN MLFLOW MODEL REGISTRY

Model Name: SMS_Spam_LogisticRegression
Description: N/A
Latest Version: 2
Last Updated: 1771189735767
----------------------------------------------------------------------

Model Name: SMS_Spam_NaiveBayes
Description: N/A
Latest Version: 2
Last Updated: 1771189758129
----------------------------------------------------------------------

Model Name: SMS_Spam_RandomForest
Description: N/A
Latest Version: 2
Last Updated: 1771189784981
----------------------------------------------------------------------

‚úì All models successfully registered and tracked with MLflow!
