# MLflow Example: Tracking Two Model Training Runs

In [29]:
# === Model Training and Experimentation ===

# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits


In [30]:

# Load dataset
data = load_digits()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Define a function to train a model and return its accuracy
def train_model(n_estimators, max_depth):
    # Train a RandomForestClassifier
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    print(f"Model with n_estimators={n_estimators}, max_depth={max_depth} achieved accuracy={accuracy:.4f}")
    return model, accuracy

In [31]:

# Train models with different hyperparameter configurations
print("Training Model 1...")
model_1, acc_1 = train_model(n_estimators=20, max_depth=5)

Training Model 1...
Model with n_estimators=20, max_depth=5 achieved accuracy=0.9389


In [32]:
print("\nTraining Model 2...")
model_2, acc_2 = train_model(n_estimators=100, max_depth=10)



Training Model 2...
Model with n_estimators=100, max_depth=10 achieved accuracy=0.9722


Now adding MLFlow

In [33]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits

In [34]:
mlflow.set_tracking_uri("https://dagshub.com/nada912/testingDVC.mlflow")

In [35]:
data = load_digits()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

In [36]:
# Set MLflow experiment name. This will get created if it doesn't exist
experiment_name = "RandomForestExperiment"
mlflow.set_experiment(experiment_name)

2025/01/07 16:27:32 INFO mlflow.tracking.fluent: Experiment with name 'RandomForestExperiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/5cb0e64a14b0496d97a6483b0714d34f', creation_time=1736263652172, experiment_id='0', last_update_time=1736263652172, lifecycle_stage='active', name='RandomForestExperiment', tags={}>

In [37]:
# Define a function to train a model, log parameters and metrics to MLflow
def train_and_log_model(n_estimators, max_depth):
    with mlflow.start_run(): # <--
        # Train a RandomForestClassifier
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions
        predictions = model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)

        # Log parameters, metrics, and the model itself
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(model, "RandomForestClassifier_V0")

        print(f"Logged RandomForest model with n_estimators={n_estimators}, max_depth={max_depth}, accuracy={accuracy:.4f}")


In [38]:
# Train and log Model 1
print("Training and Logging Model 1...")
train_and_log_model(n_estimators=20, max_depth=5)

Training and Logging Model 1...




Logged RandomForest model with n_estimators=20, max_depth=5, accuracy=0.9389
🏃 View run bustling-stork-464 at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0/runs/99d81d382b69405282e430b6f73df720
🧪 View experiment at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0


In [39]:
# Train and log Model 2
print("\nTraining and Logging Model 2...")
train_and_log_model(n_estimators=100, max_depth=10)


Training and Logging Model 2...




Logged RandomForest model with n_estimators=100, max_depth=10, accuracy=0.9722
🏃 View run stylish-calf-373 at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0/runs/b3bcd6fbc9f7497087849ed06e66b564
🧪 View experiment at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0


In [40]:
# Instructions to visualize results
print("\nTo view the results, run the following command in your terminal:")
print("mlflow ui")
print("Then navigate to http://127.0.0.1:5000 to explore the experiment results.")


To view the results, run the following command in your terminal:
mlflow ui
Then navigate to http://127.0.0.1:5000 to explore the experiment results.


# Register the model

In [41]:
model_name="RandomForestClassifier_V0"
run_id="b3bcd6fbc9f7497087849ed06e66b564"
model_uri = f"runs:/{run_id}/{model_name}"
response = mlflow.register_model(model_uri, model_name)

Registered model 'RandomForestClassifier_V0' already exists. Creating a new version of this model...
2025/01/07 16:29:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier_V0, version 1
Created version '1' of model 'RandomForestClassifier_V0'.


# Load the model

In [42]:
model_ver = 1
model_uri = f"models:/RandomForestClassifier_V0/1"

loaded_model = mlflow.sklearn.load_model(model_uri)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 5/5 [00:01<00:00,  4.41it/s]


now loaded_model is imported from the MLFlow registry

In [43]:
loaded_model.predict(X_test)

array([6, 9, 3, 7, 2, 1, 5, 2, 5, 2, 1, 9, 4, 0, 4, 2, 3, 7, 8, 8, 4, 3,
       9, 7, 5, 6, 3, 5, 6, 3, 4, 9, 1, 4, 4, 6, 9, 4, 7, 6, 6, 9, 1, 3,
       6, 1, 3, 0, 6, 5, 5, 1, 9, 5, 6, 0, 9, 0, 0, 1, 0, 4, 5, 2, 4, 5,
       7, 0, 7, 5, 9, 5, 5, 4, 7, 0, 4, 5, 5, 9, 9, 0, 2, 3, 8, 0, 6, 4,
       4, 9, 1, 2, 8, 3, 5, 2, 9, 4, 4, 4, 4, 3, 5, 3, 1, 3, 5, 9, 4, 2,
       7, 7, 4, 4, 1, 9, 2, 7, 8, 7, 2, 6, 9, 4, 0, 7, 2, 7, 5, 8, 7, 5,
       7, 9, 0, 6, 6, 4, 2, 8, 0, 9, 4, 6, 9, 9, 6, 9, 0, 5, 5, 6, 6, 0,
       6, 4, 3, 9, 3, 7, 7, 2, 9, 0, 4, 5, 8, 6, 5, 9, 9, 8, 4, 2, 1, 3,
       7, 7, 2, 2, 3, 9, 8, 0, 3, 2, 2, 5, 6, 9, 9, 4, 1, 5, 4, 2, 3, 6,
       4, 8, 5, 9, 5, 7, 1, 9, 4, 8, 1, 5, 4, 4, 9, 6, 1, 8, 6, 0, 4, 5,
       2, 7, 4, 6, 4, 5, 6, 0, 3, 2, 3, 6, 7, 1, 5, 1, 4, 7, 6, 8, 1, 5,
       5, 1, 5, 2, 8, 8, 9, 5, 7, 6, 2, 2, 2, 3, 4, 8, 8, 3, 6, 0, 9, 7,
       7, 0, 1, 0, 4, 5, 1, 5, 3, 6, 0, 4, 1, 0, 0, 3, 6, 5, 9, 7, 3, 5,
       5, 9, 9, 8, 5, 3, 3, 2, 0, 5, 8, 3, 4, 0, 2,

# Connecting a remote tracking URI

In [44]:
# we can get this code from dagshub
import dagshub
dagshub.init(repo_owner='nada912', repo_name='testingDVC', mlflow=True)

In [45]:
import mlflow
with mlflow.start_run():
  mlflow.log_param('parameter name', 'value')
  mlflow.log_metric('metric name', 1)

🏃 View run persistent-elk-668 at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0/runs/3f2a06860382445fa5bfcaa4f922dbff
🧪 View experiment at: https://dagshub.com/nada912/testingDVC.mlflow/#/experiments/0
