# MLflow Integration for Model Lifecycle Management

This notebook demonstrates MLflow integration for py-tidymodels model persistence and deployment.

**Topics Covered:**
1. Basic save/load workflow
2. Saving workflows with recipes
3. Saving grouped/nested models
4. Version compatibility checking
5. Model signatures and metadata
6. Loading and deploying models
7. Integration with experiment tracking

**Use Case:** Model lifecycle management for production deployment

**Why MLflow:**
- Standardized model serialization
- Version tracking and metadata
- Model registry for production deployment
- Integration with experiment tracking
- Cross-team collaboration

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import shutil

from py_parsnip import linear_reg, rand_forest
from py_workflows import workflow
from py_recipes import recipe
from py_mlflow import save_model, load_model, get_model_info, validate_model_exists
from py_yardstick import rmse, mae, r_squared

# Set random seed
np.random.seed(42)
sns.set_style('whitegrid')

# Create directory for saved models
models_dir = Path('mlflow_demo_models')
if models_dir.exists():
    shutil.rmtree(models_dir)
models_dir.mkdir()

print("Imports successful!")
print(f"Models will be saved to: {models_dir.absolute()}")

## 1. Generate Sample Sales Data

Create realistic sales dataset for demonstration.

In [None]:
# Generate synthetic sales data
n = 200

# Predictors
advertising = np.random.uniform(0, 100, n)
price = np.random.uniform(10, 50, n)
competitor_price = np.random.uniform(10, 50, n)
seasonality = np.sin(np.linspace(0, 4*np.pi, n))

# True sales relationship
sales = (
    100 +
    1.5 * advertising +
    -2.0 * price +
    1.0 * competitor_price +
    30 * seasonality +
    np.random.randn(n) * 5
)

# Create DataFrame
data = pd.DataFrame({
    'sales': sales,
    'advertising': advertising,
    'price': price,
    'competitor_price': competitor_price,
    'seasonality': seasonality
})

# Split train/test
train_data = data.iloc[:160]
test_data = data.iloc[160:]

print(f"Training data: {len(train_data)} observations")
print(f"Test data: {len(test_data)} observations")
print(f"\nData summary:")
print(data.describe())

## 2. Basic ModelFit Save/Load

Simplest use case: Save and load a fitted model.

In [None]:
# Fit a simple linear regression model
spec = linear_reg(penalty=0.1, mixture=0.5)
fit = spec.fit(train_data, "sales ~ advertising + price + competitor_price")

# Evaluate on test data
fit_eval = fit.evaluate(test_data)

print("Model fitted and evaluated!")
print(f"\nModel type: {fit.spec.model_type}")
print(f"Engine: {fit.spec.engine}")

# Get performance metrics
outputs, coeffs, stats = fit_eval.extract_outputs()

print(f"\nTest RMSE: {stats[stats['split'] == 'test']['rmse'].values[0]:.4f}")
print(f"Test R²: {stats[stats['split'] == 'test']['r_squared'].values[0]:.4f}")

In [None]:
# Save model with MLflow
model_path = models_dir / 'basic_linear_model'

fit_eval.save_mlflow(
    path=str(model_path),
    signature='auto',  # Auto-infer input/output schema
    input_example=train_data.head(5),  # Save example input
    metadata={
        'dataset': 'synthetic_sales',
        'version': '1.0',
        'description': 'Linear regression with elastic net regularization'
    }
)

print(f"\nModel saved to: {model_path}")
print(f"✓ Model successfully serialized with MLflow format")

In [None]:
# Verify model exists
exists = validate_model_exists(str(model_path))
print(f"Model exists: {exists}")

# Get model info without loading
info = get_model_info(str(model_path))

print(f"\nModel Metadata:")
print(f"  Model Type: {info['model_type']}")
print(f"  Engine: {info['engine']}")
print(f"  Mode: {info['mode']}")
print(f"  py-tidymodels Version: {info['py_tidymodels_version']}")
print(f"  Fit Timestamp: {info['fit_timestamp']}")
print(f"  Custom Metadata: {info['metadata']}")

In [None]:
# Load model from disk
loaded_model = load_model(str(model_path))

print("\nModel loaded successfully!")
print(f"Model type: {loaded_model.spec.model_type}")
print(f"Engine: {loaded_model.spec.engine}")

# Compare predictions
preds_original = fit_eval.predict(test_data)
preds_loaded = loaded_model.predict(test_data)

predictions_match = np.allclose(
    preds_original['.pred'].values,
    preds_loaded['.pred'].values
)

print(f"\n✓ Predictions match: {predictions_match}")
print(f"Original prediction (first 5): {preds_original['.pred'].head().values}")
print(f"Loaded prediction (first 5):   {preds_loaded['.pred'].head().values}")

## 3. Save WorkflowFit with Recipe

Demonstrate saving workflows with preprocessing pipelines.

In [None]:
# Create workflow with recipe
rec = (
    recipe()
    .step_normalize()  # Normalize all numeric predictors
    .step_pca(num_comp=3)  # Reduce to 3 PCA components
)

wf = workflow().add_recipe(rec).add_model(
    rand_forest(trees=50, min_n=5).set_mode('regression')
)

# Fit workflow
wf_fit = wf.fit(train_data)

print("Workflow fitted!")
print(f"\nModel: {wf_fit.spec.model_type}")
print(f"Recipe steps: {len(rec.steps)}")
print(f"  - Normalization")
print(f"  - PCA (3 components)")

In [None]:
# Evaluate workflow
wf_eval = wf_fit.evaluate(test_data)
outputs_wf, coeffs_wf, stats_wf = wf_eval.extract_outputs()

print(f"\nWorkflow performance:")
print(f"Test RMSE: {stats_wf[stats_wf['split'] == 'test']['rmse'].values[0]:.4f}")
print(f"Test R²: {stats_wf[stats_wf['split'] == 'test']['r_squared'].values[0]:.4f}")

In [None]:
# Save workflow (saves both recipe and model)
workflow_path = models_dir / 'workflow_with_recipe'

wf_eval.save_mlflow(
    path=str(workflow_path),
    signature='auto',
    input_example=train_data.head(5),
    metadata={
        'recipe_steps': len(rec.steps),
        'pca_components': 3,
        'model_type': 'random_forest'
    }
)

print(f"Workflow saved to: {workflow_path}")
print("✓ Both recipe and model serialized")

In [None]:
# Load workflow
loaded_workflow = load_model(str(workflow_path))

print("\nWorkflow loaded successfully!")

# Recipe preprocessing is applied automatically during prediction
test_preds = loaded_workflow.predict(test_data)

print(f"\nPredictions generated: {len(test_preds)} rows")
print(f"✓ Recipe preprocessing applied automatically")
print(f"\nFirst 5 predictions: {test_preds['.pred'].head().values}")

# Verify match
wf_preds_original = wf_eval.predict(test_data)
workflow_match = np.allclose(
    wf_preds_original['.pred'].values,
    test_preds['.pred'].values
)
print(f"\n✓ Workflow predictions match: {workflow_match}")

## 4. Save Grouped/Nested Models

Demonstrate saving per-group models.

In [None]:
# Create grouped sales data
groups = []
for store in ['Store_A', 'Store_B', 'Store_C']:
    n_store = 60

    # Different stores have different dynamics
    if store == 'Store_A':
        coef_adv = 2.0
        coef_price = -3.0
    elif store == 'Store_B':
        coef_adv = 1.0
        coef_price = -1.5
    else:  # Store_C
        coef_adv = 1.5
        coef_price = -2.0

    adv = np.random.uniform(0, 100, n_store)
    pr = np.random.uniform(10, 50, n_store)
    comp_pr = np.random.uniform(10, 50, n_store)

    sales_store = (
        100 + coef_adv * adv + coef_price * pr + comp_pr +
        np.random.randn(n_store) * 5
    )

    store_df = pd.DataFrame({
        'store': store,
        'sales': sales_store,
        'advertising': adv,
        'price': pr,
        'competitor_price': comp_pr
    })
    groups.append(store_df)

grouped_data = pd.concat(groups, ignore_index=True)
grouped_train = grouped_data.iloc[:150]
grouped_test = grouped_data.iloc[150:]

print(f"Grouped data created: {grouped_data['store'].nunique()} stores")
print(f"\nStore counts:")
print(grouped_data['store'].value_counts())

In [None]:
# Fit separate model for each store
spec_nested = linear_reg()
nested_fit = spec_nested.fit_nested(
    grouped_train,
    formula="sales ~ advertising + price + competitor_price",
    group_col="store"
)

print(f"\nFitted {len(nested_fit.group_fits)} models (one per store)")

# Evaluate
nested_eval = nested_fit.evaluate(grouped_test)
outputs_nested, coeffs_nested, stats_nested = nested_eval.extract_outputs()

print(f"\nPer-store performance:")
print(stats_nested[stats_nested['split'] == 'test'][['group', 'rmse', 'r_squared']])

In [None]:
# Save all group models
grouped_path = models_dir / 'store_models'

nested_eval.save_mlflow(
    path=str(grouped_path),
    metadata={
        'stores': list(nested_fit.group_fits.keys()),
        'group_col': 'store'
    }
)

print(f"\nGrouped models saved to: {grouped_path}")
print("✓ All 3 store models serialized")

In [None]:
# Get model info
grouped_info = get_model_info(str(grouped_path))

print("\nGrouped Model Metadata:")
print(f"  Is Grouped: {grouped_info['is_grouped']}")
print(f"  Group Column: {grouped_info['group_col']}")
print(f"  Groups: {grouped_info['groups']}")

In [None]:
# Load grouped models
loaded_nested = load_model(str(grouped_path))

print("\nGrouped models loaded successfully!")
print(f"Number of group models: {len(loaded_nested.group_fits)}")
print(f"Groups: {list(loaded_nested.group_fits.keys())}")

# Predict (automatically routes to correct store model)
nested_preds = loaded_nested.predict(grouped_test)

print(f"\nPredictions generated: {len(nested_preds)} rows")
print(f"✓ Automatic routing to correct store model")
print(f"\nFirst 5 predictions:")
print(nested_preds[['.pred', 'store']].head())

# Verify predictions match
nested_preds_original = nested_eval.predict(grouped_test)
nested_match = np.allclose(
    nested_preds_original['.pred'].values,
    nested_preds['.pred'].values
)
print(f"\n✓ Nested predictions match: {nested_match}")

## 5. Version Compatibility

MLflow tracks py-tidymodels version and ensures compatibility.

In [None]:
# Save model with version info
version_path = models_dir / 'version_test'

fit_eval.save_mlflow(
    path=str(version_path),
    metadata={'purpose': 'version_compatibility_test'}
)

# Get version info
version_info = get_model_info(str(version_path))

print("Model Version Information:")
print(f"  Trained with py-tidymodels: {version_info['py_tidymodels_version']}")
print(f"  Fit timestamp: {version_info['fit_timestamp']}")
print(f"  Model type: {version_info['model_type']}")

# Load model (version checked automatically)
loaded_version = load_model(str(version_path))

print(f"\n✓ Model loaded successfully (version compatible)")
print(f"\nNote: If versions are incompatible, load_model() would raise a warning")

## 6. Model Signatures

MLflow signatures define input/output schemas for validation.

In [None]:
# Save model with auto-inferred signature
signature_path = models_dir / 'model_with_signature'

fit_eval.save_mlflow(
    path=str(signature_path),
    signature='auto',  # Infer from input_example
    input_example=train_data.head(3)
)

print("Model saved with signature")

# Get model info
sig_info = get_model_info(str(signature_path))

if 'signature' in sig_info:
    print(f"\nModel Signature:")
    print(f"  Inputs: {sig_info['signature']['inputs']}")
    print(f"  Outputs: {sig_info['signature']['outputs']}")
else:
    print("\nSignature not available in model info")

In [None]:
# Load and validate signature
loaded_sig = load_model(str(signature_path))

# Valid prediction (matches signature)
try:
    valid_preds = loaded_sig.predict(test_data)
    print("✓ Valid prediction succeeded")
    print(f"  Shape: {valid_preds.shape}")
except Exception as e:
    print(f"Valid prediction failed: {e}")

# Invalid prediction (missing columns) - would fail
try:
    invalid_data = test_data.drop(columns=['advertising'])
    invalid_preds = loaded_sig.predict(invalid_data)
    print("Invalid prediction succeeded (unexpected!)")
except Exception as e:
    print(f"\n✓ Invalid prediction failed as expected:")
    print(f"  Error: {type(e).__name__}")

## 7. Model Comparison

Load multiple models and compare performance.

In [None]:
# Load all saved models
models_to_compare = {
    'Linear (Basic)': load_model(str(models_dir / 'basic_linear_model')),
    'Random Forest (Recipe)': load_model(str(models_dir / 'workflow_with_recipe'))
}

# Compare on test data
comparison_results = []

for model_name, model in models_to_compare.items():
    preds = model.predict(test_data)

    rmse_val = rmse(test_data['sales'], preds['.pred']).iloc[0]['value']
    mae_val = mae(test_data['sales'], preds['.pred']).iloc[0]['value']
    r2_val = r_squared(test_data['sales'], preds['.pred']).iloc[0]['value']

    comparison_results.append({
        'Model': model_name,
        'RMSE': rmse_val,
        'MAE': mae_val,
        'R²': r2_val
    })

comparison_df = pd.DataFrame(comparison_results)

print("Model Comparison on Test Data:")
print(comparison_df)

In [None]:
# Visualize comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

metrics = ['RMSE', 'MAE', 'R²']
for idx, metric in enumerate(metrics):
    ax = axes[idx]
    ax.bar(comparison_df['Model'], comparison_df[metric], alpha=0.7)
    ax.set_ylabel(metric)
    ax.set_title(f'{metric} Comparison')
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

best_model = comparison_df.loc[comparison_df['RMSE'].idxmin(), 'Model']
print(f"\n✓ Best model (lowest RMSE): {best_model}")

## 8. Production Deployment Workflow

Demonstrate full production workflow with model registry.

In [None]:
# 1. Train multiple candidate models
print("Training candidate models...")

candidates = []

# Model 1: Linear regression
spec1 = linear_reg()
fit1 = spec1.fit(train_data, "sales ~ advertising + price + competitor_price")
fit1_eval = fit1.evaluate(test_data)
_, _, stats1 = fit1_eval.extract_outputs()
rmse1 = stats1[stats1['split'] == 'test']['rmse'].values[0]

candidates.append({
    'name': 'linear_regression',
    'fit': fit1_eval,
    'rmse': rmse1
})

# Model 2: Linear with interactions
spec2 = linear_reg()
fit2 = spec2.fit(train_data, "sales ~ advertising + price + competitor_price + I(advertising*price)")
fit2_eval = fit2.evaluate(test_data)
_, _, stats2 = fit2_eval.extract_outputs()
rmse2 = stats2[stats2['split'] == 'test']['rmse'].values[0]

candidates.append({
    'name': 'linear_with_interactions',
    'fit': fit2_eval,
    'rmse': rmse2
})

# Model 3: Random forest
spec3 = rand_forest(trees=50).set_mode('regression')
fit3 = spec3.fit(train_data, "sales ~ advertising + price + competitor_price")
fit3_eval = fit3.evaluate(test_data)
_, _, stats3 = fit3_eval.extract_outputs()
rmse3 = stats3[stats3['split'] == 'test']['rmse'].values[0]

candidates.append({
    'name': 'random_forest',
    'fit': fit3_eval,
    'rmse': rmse3
})

print("\nCandidate Models:")
for c in candidates:
    print(f"  {c['name']:<30} RMSE: {c['rmse']:.4f}")

In [None]:
# 2. Select best model
best_candidate = min(candidates, key=lambda x: x['rmse'])

print(f"\n✓ Best model: {best_candidate['name']}")
print(f"  RMSE: {best_candidate['rmse']:.4f}")

# 3. Save production model
production_path = models_dir / 'production_model'

best_candidate['fit'].save_mlflow(
    path=str(production_path),
    signature='auto',
    input_example=train_data.head(5),
    metadata={
        'model_name': best_candidate['name'],
        'test_rmse': float(best_candidate['rmse']),
        'training_date': pd.Timestamp.now().isoformat(),
        'status': 'production',
        'champion': True
    }
)

print(f"\n✓ Production model saved to: {production_path}")

In [None]:
# 4. Load production model for deployment
production_model = load_model(str(production_path))

print("✓ Production model loaded")

# Get metadata
prod_info = get_model_info(str(production_path))
print(f"\nProduction Model Metadata:")
print(f"  Model: {prod_info['metadata']['model_name']}")
print(f"  Test RMSE: {prod_info['metadata']['test_rmse']:.4f}")
print(f"  Training Date: {prod_info['metadata']['training_date']}")
print(f"  Status: {prod_info['metadata']['status']}")

# 5. Make production predictions
new_data = pd.DataFrame({
    'advertising': [50, 75, 30],
    'price': [25, 30, 20],
    'competitor_price': [28, 32, 22],
    'seasonality': [0.5, 0.7, -0.3]
})

production_preds = production_model.predict(new_data)

print(f"\nProduction Predictions:")
print(production_preds[['.pred']])

## 9. Model Registry Workflow (Conceptual)

While this demo saves models locally, in production you'd integrate with MLflow tracking server and model registry.

**Production Setup:**
```python
import mlflow

# Set tracking URI
mlflow.set_tracking_uri("http://mlflow-server:5000")

# Start experiment
mlflow.set_experiment("sales_forecasting")

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("model_type", "linear_reg")
    mlflow.log_param("penalty", 0.1)

    # Train model
    fit = spec.fit(train_data, formula)

    # Log metrics
    mlflow.log_metric("test_rmse", rmse_val)

    # Save model to registry
    fit.save_mlflow(
        "models/sales_model",
        registered_model_name="SalesForecaster"
    )

# Transition to production
client = MlflowClient()
client.transition_model_version_stage(
    name="SalesForecaster",
    version=1,
    stage="Production"
)

# Load from registry
prod_model = mlflow.pyfunc.load_model("models:/SalesForecaster/Production")
```

## 10. Summary and Best Practices

**Key Takeaways:**

1. **MLflow Integration Benefits:**
   - Standardized model serialization
   - Version tracking and metadata
   - Model signatures for validation
   - Production deployment support
   - Cross-team collaboration

2. **Save/Load Capabilities:**
   - Basic ModelFit objects
   - WorkflowFit with recipes
   - Grouped/nested models
   - Automatic version tracking

3. **Model Signatures:**
   - Auto-inference from input examples
   - Input/output schema validation
   - Prevents prediction errors

4. **Version Compatibility:**
   - Tracks py-tidymodels version
   - Warns on incompatible loads
   - Reproducibility guaranteed

5. **Production Workflow:**
   - Train multiple candidates
   - Compare on validation set
   - Save best model with metadata
   - Load for deployment
   - Track performance over time

6. **Best Practices:**
   - Always save metadata (training date, metrics, etc.)
   - Use model signatures for validation
   - Version models explicitly
   - Document model lineage
   - Monitor production performance
   - Retrain when drift detected

7. **Integration Points:**
   - Local file storage (demo)
   - MLflow tracking server (production)
   - Model registry (deployment)
   - CI/CD pipelines (automation)
   - Monitoring systems (observability)

In [None]:
# Final summary: List all saved models
print("\n" + "="*70)
print("SAVED MODELS SUMMARY")
print("="*70)

for model_dir in models_dir.iterdir():
    if model_dir.is_dir():
        info = get_model_info(str(model_dir))

        print(f"\nModel: {model_dir.name}")
        print(f"  Type: {info['model_type']}")
        print(f"  Engine: {info['engine']}")
        print(f"  Mode: {info['mode']}")
        print(f"  Grouped: {info.get('is_grouped', False)}")
        print(f"  Timestamp: {info['fit_timestamp']}")
        if 'metadata' in info and info['metadata']:
            print(f"  Metadata: {info['metadata']}")

print("\n" + "="*70)
print("DEMO COMPLETE")
print("="*70)
print("MLflow provides robust model lifecycle management for py-tidymodels!")