Set up your Colab environment: Get MLflow ready to use.

In [1]:
!pip install mlflow scikit-learn numpy pandas
!pip install pyngrok



In [2]:
import mlflow
import subprocess
from pyngrok import ngrok, conf
import getpass
import os
import time
import psutil

# Where MLflow will save your experiment data (a file named mlflow.db in your Colab files)
# This is the 'backend store' for the server
MLFLOW_SERVER_BACKEND_URI = "sqlite:///mlflow.db"
MLFLOW_PORT = 5000

# Clean up any old MLflow or ngrok processes that might be running
print("Cleaning up any previous MLflow UI or ngrok processes...")
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
    try:
        cmdline = ' '.join(proc.info['cmdline'])
        if ('mlflow ui' in cmdline or 'ngrok' in cmdline) and f':{MLFLOW_PORT}' in cmdline:
            proc.kill()
            print(f"Killed existing process: {proc.info['name']} (PID: {proc.info['pid']})")
    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
        pass
print("Cleanup complete.")

# Start the MLflow UI server in the background
print(f"Starting MLflow UI server on port {MLFLOW_PORT}...")
log_file = "mlflow_server.log"
mlflow_command = ["mlflow", "ui", "--backend-store-uri", MLFLOW_SERVER_BACKEND_URI, "--port", str(MLFLOW_PORT)]
mlflow_process = subprocess.Popen(mlflow_command,
                                  stdout=open(log_file, 'w'),
                                  stderr=subprocess.STDOUT,
                                  preexec_fn=os.setsid)

# Give the server a few seconds to start up
print("Waiting for MLflow UI to be ready...")
time.sleep(5)

# Check if the MLflow server started correctly
if mlflow_process.poll() is not None:
    print(f"MLflow UI server failed to start (exit code {mlflow_process.poll()}). Please check {log_file} for errors.")
    with open(log_file, 'r') as f:
        print("\n--- MLflow Server Log ---\n", f.read(), "\n--- End Log ---")
    raise Exception("MLflow UI server did not start. Cannot proceed.")

# Connect ngrok to your MLflow server to make it viewable online
print("Enter your ngrok authtoken (get it from https://dashboard.ngrok.com/auth):")
if not conf.get_default().auth_token:
    conf.get_default().auth_token = getpass.getpass()

print(f"Creating ngrok tunnel to MLflow UI on port {MLFLOW_PORT}...")
try:
    tunnel = ngrok.connect(MLFLOW_PORT)
    public_url = tunnel.public_url
    print(f"Your MLflow UI is accessible at: {public_url}")
except Exception as e:
    print(f"Failed to create ngrok tunnel: {e}")
    with open(log_file, 'r') as f:
        print("\n--- MLflow Server Log ---\n", f.read(), "\n--- End Log ---")
    raise Exception("Failed to establish ngrok tunnel. Check MLflow server status and ngrok token.")

# --- THIS IS THE CRUCIAL CHANGE ---
# Now, set the tracking URI for the MLflow client (your Python script)
# to the public URL provided by ngrok.
mlflow.set_tracking_uri(public_url)
print(f"MLflow client tracking URI set to: {mlflow.get_tracking_uri()}")

print("\n--- MLflow Setup Complete. You can now proceed with your experiments. ---")

Cleaning up any previous MLflow UI or ngrok processes...
Cleanup complete.
Starting MLflow UI server on port 5000...
Waiting for MLflow UI to be ready...
Enter your ngrok authtoken (get it from https://dashboard.ngrok.com/auth):
··········
Creating ngrok tunnel to MLflow UI on port 5000...
Your MLflow UI is accessible at: https://18764a3bddb8.ngrok-free.app
MLflow client tracking URI set to: https://18764a3bddb8.ngrok-free.app

--- MLflow Setup Complete. You can now proceed with your experiments. ---


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Create some fake data for a simple prediction task
np.random.seed(42) # For consistent results
X = 2 * np.random.rand(100, 1) # Our input feature
y = 4 + 3 * X + np.random.randn(100, 1) # Our target, with some random noise

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function to train our model and log everything with MLflow
def run_experiment(iterations, learning_rate_param):
    """
    This function trains a linear regression model and records its details
    in your MLflow lab notebook.
    """
    # Start a new entry in your lab notebook for this specific experiment
    # We give it a descriptive name
    with mlflow.start_run(run_name=f"Linear Regression Test: Iter={iterations}, LR={learning_rate_param}") as run:
        # Record the settings (parameters) we used for this experiment
        mlflow.log_param("number_of_iterations", iterations)
        mlflow.log_param("simulated_learning_rate", learning_rate_param)

        print(f"Starting MLflow Run ID: {run.info.run_id}")
        print(f"Settings logged: Iterations={iterations}, Learning Rate={learning_rate_param}")

        # Train our simple linear regression model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Make predictions on the test data
        y_predicted = model.predict(X_test)

        # Calculate how well our model performed
        rmse = np.sqrt(mean_squared_error(y_test, y_predicted))
        r2 = r2_score(y_test, y_predicted)

        # Record the performance numbers (metrics) in our lab notebook
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2_score", r2)
        print(f"Performance logged: RMSE={rmse:.4f}, R2 Score={r2:.4f}")

        # Save the trained model itself as an "artifact" (a file associated with this run)
        mlflow.sklearn.log_model(model, "trained_linear_model")
        print("Trained model saved as 'trained_linear_model'.")

        # Create a visual plot of predictions and save it
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.scatter(X_test, y_test, color='blue', label='Actual Data')
        ax.plot(X_test, y_predicted, color='red', label='Predicted Line')
        ax.set_title(f'Predictions for Iter={iterations} (RMSE: {rmse:.2f})')
        ax.set_xlabel('Input Feature (X)')
        ax.set_ylabel('Target (y)')
        ax.legend()
        plt.tight_layout()

        plot_filename = "predictions_plot.png"
        plt.savefig(plot_filename)
        mlflow.log_artifact(plot_filename) # Log the plot file as an artifact
        print(f"Prediction plot saved as '{plot_filename}' and logged.")
        plt.close(fig) # Close the plot to prevent it from showing up in notebook output

        # Also save a simple text file with model details
        with open("model_details.txt", "w") as f:
            f.write(f"Model Intercept: {model.intercept_[0]:.4f}\n")
            f.write(f"Model Coefficient: {model.coef_[0][0]:.4f}\n")
            f.write(f"Test RMSE: {rmse:.4f}\n")
            f.write(f"Test R2 Score: {r2:.4f}\n")
        mlflow.log_artifact("model_details.txt")
        print("Model details text file saved as 'model_details.txt' and logged.")

    print(f"MLflow Run ID: {run.info.run_id} completed.")

In [5]:
print("\n--- Running different experiments and logging them with MLflow ---")

# Experiment 1: First attempt with some settings
run_experiment(iterations=100, learning_rate_param=0.01)

# Experiment 2: Second attempt with different settings
run_experiment(iterations=200, learning_rate_param=0.005)

# Experiment 3: Third attempt
run_experiment(iterations=50, learning_rate_param=0.02)

print("\nAll experiments finished! Now, go to the MLflow UI link provided earlier to see your results.")


--- Running different experiments and logging them with MLflow ---
Starting MLflow Run ID: b1e0f64f9a3b4200b9253e641b2b552d
Settings logged: Iterations=100, Learning Rate=0.01




Performance logged: RMSE=0.8085, R2 Score=0.8072




Trained model saved as 'trained_linear_model'.
Prediction plot saved as 'predictions_plot.png' and logged.
Model details text file saved as 'model_details.txt' and logged.
🏃 View run Linear Regression Test: Iter=100, LR=0.01 at: https://18764a3bddb8.ngrok-free.app/#/experiments/0/runs/b1e0f64f9a3b4200b9253e641b2b552d
🧪 View experiment at: https://18764a3bddb8.ngrok-free.app/#/experiments/0
MLflow Run ID: b1e0f64f9a3b4200b9253e641b2b552d completed.
Starting MLflow Run ID: badde063e4bc46759f632c98f4b64f68
Settings logged: Iterations=200, Learning Rate=0.005




Performance logged: RMSE=0.8085, R2 Score=0.8072




Trained model saved as 'trained_linear_model'.
Prediction plot saved as 'predictions_plot.png' and logged.
Model details text file saved as 'model_details.txt' and logged.
🏃 View run Linear Regression Test: Iter=200, LR=0.005 at: https://18764a3bddb8.ngrok-free.app/#/experiments/0/runs/badde063e4bc46759f632c98f4b64f68
🧪 View experiment at: https://18764a3bddb8.ngrok-free.app/#/experiments/0
MLflow Run ID: badde063e4bc46759f632c98f4b64f68 completed.
Starting MLflow Run ID: 6491329a486949a7a4113d377afdf1c2
Settings logged: Iterations=50, Learning Rate=0.02




Performance logged: RMSE=0.8085, R2 Score=0.8072




Trained model saved as 'trained_linear_model'.
Prediction plot saved as 'predictions_plot.png' and logged.
Model details text file saved as 'model_details.txt' and logged.
🏃 View run Linear Regression Test: Iter=50, LR=0.02 at: https://18764a3bddb8.ngrok-free.app/#/experiments/0/runs/6491329a486949a7a4113d377afdf1c2
🧪 View experiment at: https://18764a3bddb8.ngrok-free.app/#/experiments/0
MLflow Run ID: 6491329a486949a7a4113d377afdf1c2 completed.

All experiments finished! Now, go to the MLflow UI link provided earlier to see your results.
