This work will involve model registries and versioning in MLFlow. It will use the dataset reg2, which a target y and 2 potential predictor variables x1 and x2. Use scikit-learn for the linear regression (for MLFlow, this is the model flavor) (so you will have to split up the data).

1. Read through the quickstart and model registry tutorials linked in this directory.

2. Build 3 models:

a. Try a linear regression model using x1 only to predict y. Look at how well it does. Call this model model_1.

b. Try a linear regression model using x2 only to predict y. Look at how well it does. Call this model model_2.

c. Finally, use x1 and x2 to predict y. Compare the errors and R^2 values to the previous model. How do they compare?

3. To do the version control with MLFlow, follow these steps

a. Create a repository in your Git for the models

b. Put the models there.

c. Register the models in MLFlow (you can use the above names or

4. Start and view the tracking server for the models. Turn in a pdf of your notebook, along with a screen shot of the tracking server and the requirements.txt file.

In [7]:
import warnings
import logging
import sys
import pandas as pd
import numpy as np
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

mlflow.set_tracking_uri("http://127.0.0.1:5001")
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2
try:
    data = pd.read_csv("reg2.csv")
except Exception as e:
    logger.exception("Unable to load dataset. Error: %s", e)
    sys.exit(1)

print("Dataset Head:")
data.head()
train, test = train_test_split(data, test_size=0.25, random_state=42)

train_y = train[["y"]]
test_y = test[["y"]]
#model 1
with mlflow.start_run(run_name="model_1"):
    train_x = train[["x1"]]
    test_x = test[["x1"]]

    lr = LinearRegression()
    lr.fit(train_x, train_y)

    preds = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, preds)

    print("\nModel 1 (x1 only):")
    print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

    mlflow.log_param("features", "x1")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    signature = infer_signature(train_x, lr.predict(train_x))

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(lr, "model", registered_model_name="model_1", signature=signature)
    else:
        mlflow.sklearn.log_model(lr, "model", signature=signature)

#model 2
with mlflow.start_run(run_name="model_2"):
    train_x = train[["x2"]]
    test_x = test[["x2"]]

    lr = LinearRegression()
    lr.fit(train_x, train_y)

    preds = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, preds)

    print("\nModel 2 (x2 only):")
    print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

    mlflow.log_param("features", "x2")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    signature = infer_signature(train_x, lr.predict(train_x))

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(lr, "model", registered_model_name="model_2", signature=signature)
    else:
        mlflow.sklearn.log_model(lr, "model", signature=signature)

#model 3
with mlflow.start_run(run_name="model_3"):
    train_x = train[["x1", "x2"]]
    test_x = test[["x1", "x2"]]

    lr = LinearRegression()
    lr.fit(train_x, train_y)

    preds = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, preds)

    print("\nModel 3 (x1 & x2):")
    print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

    mlflow.log_param("features", "x1, x2")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    signature = infer_signature(train_x, lr.predict(train_x))

    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(lr, "model", registered_model_name="model_3", signature=signature)
    else:
        mlflow.sklearn.log_model(lr, "model", signature=signature)




Dataset preview:

Model 1 (x1 only):
RMSE: 1.1426, MAE: 0.9516, R2: -0.0089


Registered model 'model_1' already exists. Creating a new version of this model...
2025/09/26 12:36:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model_1, version 2
Created version '2' of model 'model_1'.


🏃 View run model_1 at: http://127.0.0.1:5001/#/experiments/0/runs/23fe15d7d33d41da94e4e95ce4ba305a
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/0

Model 2 (x2 only):
RMSE: 1.1859, MAE: 1.0030, R2: -0.0868


Registered model 'model_2' already exists. Creating a new version of this model...
2025/09/26 12:36:23 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model_2, version 2
Created version '2' of model 'model_2'.


🏃 View run model_2 at: http://127.0.0.1:5001/#/experiments/0/runs/b4c3455cc8a443b4a546cb1d28065429
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/0

Model 3 (x1 & x2):
RMSE: 0.2460, MAE: 0.1912, R2: 0.9532


Registered model 'model_3' already exists. Creating a new version of this model...
2025/09/26 12:36:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model_3, version 2


🏃 View run model_3 at: http://127.0.0.1:5001/#/experiments/0/runs/eb15edeb93a940bcae7568a41467f3f0
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/0


Created version '2' of model 'model_3'.
