# MLflow Quickstart

Welcome! In this notebook, you'll get hands-on experience with MLflow, a tool for tracking machine learning experiments.

**Objective:** Run and track a simple experiment using MLflow in under 10 minutes.

## 1. Run a Simple Experiment

First, configure MLflow to use file-based storage, then train a simple model and log parameters and metrics.

In [None]:
# Set MLflow tracking to use file-based storage
import mlflow
import os

# Set tracking URI to the mlruns directory
tracking_uri = os.path.join(os.getcwd(), "mlruns")
mlflow.set_tracking_uri(f"file:///{tracking_uri}")

# Create the mlops_lecture experiment
mlflow.set_experiment("mlops_lecture")

print(f"MLflow tracking URI set to: {mlflow.get_tracking_uri()}")
print(f"Using experiment: {mlflow.get_experiment(mlflow.get_experiment_by_name('mlops_lecture').experiment_id).name}")

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import numpy as np

# Generate dummy data
X, y = make_regression(n_samples=100, n_features=1, noise=10)

with mlflow.start_run():
    model = LinearRegression()
    model.fit(X, y)
    
    # Log parameters and metrics
    mlflow.log_param("fit_intercept", model.fit_intercept)
    mlflow.log_param("n_samples", 100)
    mlflow.log_param("noise", 10)
    mlflow.log_metric("score", model.score(X, y))
    
    # Create and log the regression plot
    slope = model.coef_[0]
    intercept = model.intercept_
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot the actual data points
    ax.scatter(X, y, color='blue', alpha=0.6, label='Data Points')
    
    # Create predictions for the regression line
    X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
    y_pred = model.predict(X_line)
    
    # Plot the regression line
    ax.plot(X_line, y_pred, color='red', linewidth=2, label='Regression Line')
    
    # Add equation to the plot
    equation_text = f'y = {slope:.2f}x + {intercept:.2f}'
    ax.text(0.05, 0.95, equation_text, transform=ax.transAxes,
            fontsize=12, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
    
    ax.set_xlabel('Feature')
    ax.set_ylabel('Target')
    ax.set_title('Linear Regression: Data Points and Fitted Line')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Log the figure to MLflow
    mlflow.log_figure(fig, "regression_plot.png")
    plt.close(fig)
    
    # Log the model
    mlflow.sklearn.log_model(model, "model")
    
    print("Experiment logged!")
    print(f"Model R² Score: {model.score(X, y):.4f}")
    print(f"Equation: {equation_text}")

## 2 Visualize the Data and Model

Let's plot the data points and the regression line.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Get model parameters
slope = model.coef_[0]
intercept = model.intercept_

# Create the plot
plt.figure(figsize=(10, 6))

# Plot the actual data points
plt.scatter(X, y, color='blue', alpha=0.6, label='Data Points')

# Create predictions for the regression line
X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_pred = model.predict(X_line)

# Plot the regression line
plt.plot(X_line, y_pred, color='red', linewidth=2, label='Regression Line')

# Add equation to the plot
equation_text = f'y = {slope:.2f}x + {intercept:.2f}'
plt.text(0.05, 0.95, equation_text, transform=plt.gca().transAxes,
         fontsize=12, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

plt.xlabel('Feature')
plt.ylabel('Target')
plt.title('Linear Regression: Data Points and Fitted Line')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"Model R² Score: {model.score(X, y):.4f}")
print(f"Equation: {equation_text}")

## 3. View Experiment Results

Start the MLflow UI to view your experiment results.

**Note:** If you don't see experiments in the UI, make sure you've run cell 2 (MLflow configuration) before running the experiment cell above. If needed, re-run the experiment cell after setting the tracking URI.

In [None]:
import subprocess
import time
import os

# Use relative path to mlruns directory for cross-platform compatibility
mlruns_path = "mlruns"

print("To view MLflow UI, run this command in a separate terminal:")
print(f"  mlflow ui --port 5000 --backend-store-uri {mlruns_path}")
print("\nAlternatively, running MLflow UI in background...")

# Try to start MLflow UI in background
try:
    # Start MLflow UI process
    process = subprocess.Popen(
        ["mlflow", "ui", "--port", "5000", "--backend-store-uri", mlruns_path],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, 'CREATE_NO_WINDOW') else 0
    )
    
    time.sleep(2)  # Wait for server to start
    
    # Check if process is running
    if process.poll() is None:
        print("✓ MLflow UI started successfully!")
        print("✓ Visit http://localhost:5000 to view your experiments")
        
        # Optionally open browser
        # webbrowser.open('http://localhost:5000')
    else:
        print("✗ Failed to start MLflow UI. Please run the command in terminal.")
except Exception as e:
    print(f"✗ Error starting MLflow UI: {e}")
    print(f"Please run 'mlflow ui --port 5000 --backend-store-uri {mlruns_path}' in a terminal.")

## 4. Assignment Complete

You have logged an experiment and viewed it in MLflow UI!

**Congratulations!**