In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from pathlib import Path
import uuid
import mlflow
import optuna
import mlflow.sklearn
from catboost import CatBoostRegressor
from mlflow.tracking import MlflowClient
from mlflow.models import infer_signature
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from typing import Dict, Union
from sklearn.preprocessing import StandardScaler

# Starting ML Tracking Server

In the terminal, navigate to the notebook folder and the start the ml server

Run in the terminal (command line): mlflow server --host 127.0.0.1 --port 8080

**This is what it does:**

1. Starts the MLflow Tracking Server
A dedicated process that manages and serves your MLflow experiments.

2. Provides a Web UI
Accessible at http://127.0.0.1:8080 (or localhost:8080), where you can browse experiments, runs, parameters, metrics, and artifacts.

3. Exposes a Tracking API Endpoint
Other scripts or notebooks can log directly to this server if you set - **mlflow.set_tracking_uri("http://127.0.0.1:8080")**

4. MLflow automatically creates a folder mlruns/ in your working directory the first time you log something.
Inside mlruns/, it creates subfolders for:

- each experiment (default is 0)

- each run within that experiment

# Setting URI

In [2]:
# Set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

What it does:

1. It tells your MLflow client (your script/notebook) where to send all logging data (experiments, runs, params, metrics, artifacts).

3. Since our host is local, it will still write to mlruns, but here you could configure a remote host URI.

In [3]:
print("Current Tracking URI:", mlflow.get_tracking_uri())

Current Tracking URI: http://127.0.0.1:8080


# Fitting the model

Prepare the data, split it into training, validation, and test sets, and train a simple LASSO model with a regularization hyperparameter of alpha = 0.1.

In [6]:
# Load datasets
df = pd.read_csv('WineQT.csv', sep=',')

# Split the data into training, validation, and test sets
train, test = train_test_split(df, test_size=0.25, random_state=42)
x_train = train.drop(["quality"], axis=1).values
y_train = train[["quality"]].values.ravel()
x_test= test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

# Setting hyperparameter
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(x_train, y_train)

# Predicting
y_pred_val = model.predict(x_val)
y_pred_test = model.predict(x_test)

# Calculating metrics
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353


What we have just done is we conducted an experimental run! We set a hyperparameter, trained a model, and got validation and test scores (happy face). Now, our job is to log this to MLflow.

# Running first experiment

To do that, we need to run the command:

mlflow.set_experiment("Wine Quality Experiment")

If we do so, in the UI (http://127.0.0.1:8080), we will see that the experiment is created.

In [7]:
# Set the experiment name - it also creates an experiment if it doesn't exist
mlflow.set_experiment("Wine Quality Regression")

<Experiment: artifact_location='mlflow-artifacts:/677703609984501005', creation_time=1756279706345, experiment_id='677703609984501005', last_update_time=1756279706345, lifecycle_stage='active', name='Wine Quality Regression', tags={}>

Now, all we need to do is log the parameters, metrics, and the model.

In [8]:
# Setting hyperparameters
params = {
    'alpha': model.alpha
}

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Log the hyperparameters
    mlflow.log_params(params)
    # Log the loss metric
    mlflow.log_metric("rmse_val", rmse_val)
    mlflow.log_metric("rmse_test", rmse_test)
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("model_version", "baseline")

    model_name = "wine_model_lasso"
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name=model_name,
        input_example=x_train,
    )

🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b7e37b95c783488d9fbaddf0986074d3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


You can click the links above to see the run and experiment in the Tracking Server UI.

# Running MLflow experiments with different models

Now, let's make our code a bit more modular, so we can efficiently run experiments for different models without code repetition. We make 2 functions: train_model and log_run.

In [9]:
def train_model(df: pd.DataFrame, params: dict, model='Lasso'):
    """
    Train a model and return the metrics
    """
    # Split the data into training, validation, and test sets
    train, test = train_test_split(df, test_size=0.25, random_state=42)
    x_train = train.drop(["quality"], axis=1).values
    y_train = train[["quality"]].values.ravel()
    x_test= test.drop(["quality"], axis=1).values
    y_test = test[["quality"]].values.ravel()

    # Split the data into training and validation sets
    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2, random_state=42
    )

    if model == 'Lasso':
        model = Lasso(**params)
    elif model == 'CatBoost':
        model = CatBoostRegressor(**params)
    else:
        raise ValueError(f"Model {model} not supported")

    # Fitting the model
    model.fit(x_train, y_train)
    # Predicting
    y_pred_val = model.predict(x_val)
    y_pred_test = model.predict(x_test)

    # Calculating metrics
    rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')
    return model,rmse_val, rmse_test

In [10]:
def log_run(
        run_name: str,
        params: dict,
        metrics: dict,
        tags: dict,
        trained_model,
        model_type='Lasso',
        input_example=None,
        registered_model_name=None):
    """
    Log a run to MLflow
    """
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.set_tag("model_version", run_name)
    mlflow.set_tags(tags)

    if model_type == 'Lasso':
        # Log sklearn model
        model_info = mlflow.sklearn.log_model(
            sk_model=trained_model,
            name="wine_model_lasso",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    elif model_type == 'CatBoost':
        # Log CatBoost model
        model_info = mlflow.catboost.log_model(
            cb_model=trained_model,
            name="wine_model_catboost",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    else:
        raise ValueError(f"Model type {model_type} not supported")
    
    return model_info

### Running LASSO

In [11]:
# Setting hyperparameters

lasso_params = {
    'alpha': 0.1,
    'max_iter': 1000,
    'random_state': 42
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, lasso_params, model='Lasso')

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    # Let's add some tags to the run
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="lasso_baseline",
        params=lasso_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='Lasso',
    )

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353




🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/d71ef0989d044ea88e66c7cf1384ae25
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


 Now, let's run and log an experiment with a CatBoost model.

In [12]:
# Setting hyperparameters for CatBoost
catboost_params = {
    'iterations': 100,
    'learning_rate': 0.1,
    'depth': 6,
    'random_seed': 42,
    'verbose': False
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, catboost_params, model='CatBoost')

# Start an MLflow run
with mlflow.start_run(run_name='catboost_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
        'data_version': 'v1',
        'algorithm': 'gradient_boosting'
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="catboost_baseline",
        params=catboost_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='CatBoost',
    )

RMSE val = 0.5558687336128091, RMSE test = 0.5729224664412881




🏃 View run catboost_baseline at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/486d46f4b7ce4f49ae6a96bee7e25fd0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


Now, i the UI (http://127.0.0.1:8080/#/experiments) we can compare 3 different experimental runs - 2 LASSO and 1 CatBoost. To do that, select all the runs and press Compare as we have done above. Since the parameters are different for the models, the visual comparison of parameters might not tell much. However, we can compare the validation and test RMSE.

# Searching for the best model

Up to now, we could find the best model manually using the UI, which is great. However, when developing ML pipelines and registering the best model to be deployed, we need to be able to find this model programmatically. Here's how to do that.

First, we need to find the experiment ID for which we want to select the best models. We can do that using search_experiments() method.

In [13]:
# Get experiment by name
experiment_name = "Wine Quality Regression"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment '{experiment_name}' has ID: {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found")

Experiment 'Wine Quality Regression' has ID: 677703609984501005


As we know the experiment ID, we can check what the top models are for this experiment and select the best model. To select the best model, we can just select the one at the top row of the resulting top_models dataframe. In our case, this is the CatBoost model.

In [18]:
# Find high-performing models across experiments
top_models = mlflow.search_logged_models(
    experiment_ids=[str(experiment_id)],
    filter_string="metrics.rmse_test < 0.9",
    order_by=[{"field_name": "metrics.rmse_val", "ascending": True}],
)
best_model = top_models.iloc[0]
best_model

artifact_location         mlflow-artifacts:/677703609984501005/models/m-...
creation_timestamp                                            1756282364254
experiment_id                                            677703609984501005
last_updated_timestamp                                        1756282365608
metrics                   [<Metric: dataset_digest=None, dataset_name=No...
model_id                                 m-62cd8f0cdcbc4fdca59cf6c46b7e3379
model_type                                                                 
name                                                    wine_model_catboost
params                    {'depth': '6', 'learning_rate': '0.1', 'verbos...
source_run_id                              486d46f4b7ce4f49ae6a96bee7e25fd0
status                                                                READY
status_message                                                             
tags                      {'mlflow.user': 'timurbikmuhametov', 'mlflow.s...
Name: 0, dty

In [19]:
# Load the best model for inference
loaded_model = mlflow.pyfunc.load_model(f"models:/{best_model.model_id}")
y_pred = loaded_model.predict(x_test)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE on test set: {rmse_test}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

RMSE on test set: 0.5729224664412881


## Child runs

MLflow tracks experiments as named groups where all your related runs live. As we have seen up to now, each "run" is one training session where you log parameters, metrics, and artifacts. Parent and Child Runs add a hierarchical layer to this setup.

**Why Should You Care?**

1. Organizational Clarity.

With a parent-child structure, related runs are automatically grouped together. When you're running a hyperparameter search using a Bayesian approach on a particular model architecture, every iteration gets logged as a child run, while the overarching Bayesian optimization process becomes the parent run. This eliminates the guesswork of figuring out which runs belong together.

2. Enhanced Traceability.

When working on complex projects with multiple variants, child runs represent individual products or model variations. This makes it straightforward to trace back results, metrics, or artifacts to their specific run configuration. Need to find that exact setup that gave you the best performance? Just follow the hierarchy.

3. Improved Scalability.

As your experiments grow in number and complexity, having a nested structure ensures your tracking remains manageable. Navigating through a structured hierarchy is much more efficient than scrolling through a flat list of hundreds or thousands of runs. This becomes particularly valuable as projects scale up.

Now, as the concept of Parent and Child runs is clear, let's see how we can implement this.

 

### Grid search for LASSO

Let me show you how this works with our LASSO example. Say you're testing a LASSO model with different alpha values - from 1 to 0.001.

To do that, we create a function log_run_child.

This is a very similar function to log_run, but here we change the names of the run at each run and also specify parameter nested=True. This parameter indicates to MLflow that the runs are child runs.

Then we specify the parent run, and in a loop we run the child runs.

In [20]:
def log_run_child(
        run_name: str,
        params: dict,
        metrics: dict,
        tags: dict,
        trained_model,
        model_type='Lasso',
        input_example=None,
        registered_model_name=None,
        iteration=None
        ):
    """
    Log a run to MLflow
    """
    with mlflow.start_run(run_name=f"{run_name}_iteration_{iteration}", nested=True):
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)
        mlflow.set_tag("model_version", run_name)
        mlflow.set_tags(tags)

        if model_type == 'Lasso':
            # Log sklearn model
            model_info = mlflow.sklearn.log_model(
                name=f"{run_name}_iteration_{iteration}_wine_model_lasso",
                sk_model=trained_model,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )
        elif model_type == 'CatBoost':
            # Log CatBoost model
            model_info = mlflow.catboost.log_model(
                name=f"{run_name}_iteration_{iteration}_wine_model_catboost",
                cb_model=trained_model,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )
        else:
            raise ValueError(f"Model type {model_type} not supported")
    
    return model_info

In [22]:
# Specify several alphas
alpha_range = np.logspace(0, -3, num=10).tolist()
run_name = 'Lasso_child_runs_1_0_point_001'

with mlflow.start_run(run_name=run_name):
    for idx, alpha in enumerate(alpha_range):
        params = {'alpha': alpha}
        # Train the model
        trained_model, rmse_val, rmse_test = train_model(df, params, model='Lasso')
        # Prepare metrics
        metrics = {'rmse_val': rmse_val, 'rmse_test': rmse_test}
    # Log everything using our function
        model_info = log_run_child(
            run_name=run_name,
            params=params,
            metrics=metrics,
            tags=tags,
            trained_model=trained_model,
            model_type='Lasso',
            iteration=idx
        )

RMSE val = 0.7546000742808262, RMSE test = 0.7374379792573892




🏃 View run Lasso_child_runs_1_0_point_001_iteration_0 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/2b23fea4928e45f589818b9195f6893e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.7541178879815691, RMSE test = 0.7366319489063405




🏃 View run Lasso_child_runs_1_0_point_001_iteration_1 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/6b4fa35858d94675b330377776bca249
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.6762060849767165, RMSE test = 0.6743324662326164




🏃 View run Lasso_child_runs_1_0_point_001_iteration_2 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/e7b291e9adde4104962d9b22d62e355b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353




🏃 View run Lasso_child_runs_1_0_point_001_iteration_3 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/14ef8c4559f4450ab7d85f51a89605de
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.6255559065512195, RMSE test = 0.628923587071802




🏃 View run Lasso_child_runs_1_0_point_001_iteration_4 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/5b688735920d46da9cd5549a3fe514c9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.5921306582799698, RMSE test = 0.6128059355142866




🏃 View run Lasso_child_runs_1_0_point_001_iteration_5 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/e5c8a127b74249889c5a42ebec319d24
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.5803887110911056, RMSE test = 0.605750602168249




🏃 View run Lasso_child_runs_1_0_point_001_iteration_6 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/705e262f9f5d4be9b86f8314a50545cd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.5771690201433481, RMSE test = 0.6060892593315526




🏃 View run Lasso_child_runs_1_0_point_001_iteration_7 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/434d538010544e48bc8ac7320fdd9413
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.5726741757910615, RMSE test = 0.6072882839308418




🏃 View run Lasso_child_runs_1_0_point_001_iteration_8 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/312cc97c6ee84949a2687235d0d80a97
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
RMSE val = 0.5713198123541341, RMSE test = 0.6098083649853254




🏃 View run Lasso_child_runs_1_0_point_001_iteration_9 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/2f622e286947475f9e923d00e407b05a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
🏃 View run Lasso_child_runs_1_0_point_001 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/2b990ac2f7d84084a78fda542eea08ed
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


### Child runs with Optuna

In [23]:
def objective(
    trial: optuna.Trial, 
    x_train: np.ndarray, 
    y_train: np.ndarray, 
    x_val: np.ndarray, 
    y_val: np.ndarray, 
    x_test: np.ndarray, 
    y_test: np.ndarray
) -> float:
    np.random.seed(42)
    """Optimize CatBoost hyperparameters using Optuna."""
    # Sample CatBoost hyperparameters (fixed iterations for faster trials)
    params: Dict[str, Union[int, float, bool]] = {
        'iterations': 100,  # Fixed iterations for trials
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'random_seed': 42,
        'verbose': False
    }
    
    model: CatBoostRegressor = CatBoostRegressor(**params)
    model.fit(x_train, y_train)
    
    # Predictions
    y_pred_val: np.ndarray = model.predict(x_val)
    y_pred_test: np.ndarray = model.predict(x_test)
    
    # Errors
    rmse_val: float = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test: float = np.sqrt(mean_squared_error(y_test, y_pred_test))
    
    # Log the run with log_run_child
    log_run_child(
        run_name=run_name,
        params=params,
        metrics={'rmse_val': rmse_val, 'rmse_test': rmse_test},
        tags={'model_version': 'catboost_optuna', 'trial': trial.number},
        trained_model=model,
        model_type='CatBoost',
        iteration=trial.number
    )
    
    return round(rmse_val, 3)

In [28]:
run_name = 'Catboost_optuna'
with mlflow.start_run(run_name=run_name):
    np.random.seed(42)
    # Create the study and optimize
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(
        lambda trial: objective(trial, x_train, y_train, x_val, y_val, x_test, y_test),
        n_trials=50
    )
    
    # Log the best metrics and parameters from the parent run
    mlflow.log_metric("best_rmse", study.best_value)
    mlflow.log_params(study.best_params)
    mlflow.set_tag('best_model', 'true')
    
    # Train the best model with early stopping (more iterations)
    best_params = study.best_params.copy()
    best_params.update({
        'iterations': 1000,  # More iterations for final model
        'early_stopping_rounds': 50,  # Early stopping
    })
    
    best_model = CatBoostRegressor(**best_params, random_seed=42)
    best_model.fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=False)
    
    # Compute test score for the best model
    y_pred_test = best_model.predict(x_test)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    
    # Log the test score
    mlflow.log_metric("rmse_test_best", rmse_test)
    
    # Log the best model in the parent run
    model_info = mlflow.sklearn.log_model(
        sk_model=best_model,
        name="best_catboost_model",
        input_example=x_train,
        tags={'best_model': 'true'}
    )
    
    print(f"Best validation RMSE: {study.best_value:.4f}")
    print(f"Test RMSE: {rmse_test:.4f}")

[I 2025-08-27 12:45:04,344] A new study created in memory with name: no-name-b318f382-8968-4262-a3bb-7076cfca9dd2
[I 2025-08-27 12:45:06,307] Trial 0 finished with value: 0.596 and parameters: {'learning_rate': 0.03574712922600244, 'depth': 10, 'l2_leaf_reg': 7.587945476302646}. Best is trial 0 with value: 0.596.


🏃 View run Catboost_optuna_iteration_0 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/2c626324209446a78e8ee5eea3ed66e6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:07,610] Trial 1 finished with value: 0.574 and parameters: {'learning_rate': 0.07661100707771368, 'depth': 5, 'l2_leaf_reg': 2.403950683025824}. Best is trial 1 with value: 0.574.


🏃 View run Catboost_optuna_iteration_1 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/4444b476f0ca4abfab158e9ee50414ac
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:09,236] Trial 2 finished with value: 0.653 and parameters: {'learning_rate': 0.012184186502221764, 'depth': 10, 'l2_leaf_reg': 6.41003510568888}. Best is trial 1 with value: 0.574.


🏃 View run Catboost_optuna_iteration_2 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/d7fe33241362485fac54ea0c4be486ab
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:10,459] Trial 3 finished with value: 0.566 and parameters: {'learning_rate': 0.11114989443094977, 'depth': 4, 'l2_leaf_reg': 9.72918866945795}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_3 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/56c398f1ea6c493d97d500f24d12055d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:11,738] Trial 4 finished with value: 0.568 and parameters: {'learning_rate': 0.16967533607196555, 'depth': 5, 'l2_leaf_reg': 2.636424704863906}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_4 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/fa967ebc37374e66bf015d9a8fb5c7b1
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:13,024] Trial 5 finished with value: 0.601 and parameters: {'learning_rate': 0.018659959624904916, 'depth': 6, 'l2_leaf_reg': 5.72280788469014}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_5 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/2a79f44db183425697fe1224dc6575d8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:14,348] Trial 6 finished with value: 0.571 and parameters: {'learning_rate': 0.04345454109729477, 'depth': 6, 'l2_leaf_reg': 6.506676052501415}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_6 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/6f1206ec9f3b43138e5af0f705f99ce1
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:15,625] Trial 7 finished with value: 0.605 and parameters: {'learning_rate': 0.01607123851203988, 'depth': 6, 'l2_leaf_reg': 4.297256589643226}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_7 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/60c3dc880fed45c2bea7434e7e2966cf
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:17,007] Trial 8 finished with value: 0.565 and parameters: {'learning_rate': 0.04717052037625178, 'depth': 9, 'l2_leaf_reg': 2.7970640394252375}. Best is trial 8 with value: 0.565.


🏃 View run Catboost_optuna_iteration_8 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b4ba7ecb7f67490396fd3f31db58848a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:18,414] Trial 9 finished with value: 0.562 and parameters: {'learning_rate': 0.05748924681991978, 'depth': 8, 'l2_leaf_reg': 1.4180537144799796}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_9 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/a729952c1d99445e87799d8d92de3875
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:19,763] Trial 10 finished with value: 0.598 and parameters: {'learning_rate': 0.24893231508461813, 'depth': 8, 'l2_leaf_reg': 1.0679258738466384}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_10 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/52f6a06d4b3845c0a4c8872fab2efa93
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:21,130] Trial 11 finished with value: 0.574 and parameters: {'learning_rate': 0.03253425182484387, 'depth': 8, 'l2_leaf_reg': 3.5584186834693132}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_11 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/9ce8d2781828436f9d60ebc750d1a50b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:22,464] Trial 12 finished with value: 0.565 and parameters: {'learning_rate': 0.07186384123660489, 'depth': 8, 'l2_leaf_reg': 1.347215194590903}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_12 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/43276a2e281248ec9ec83da115627b47
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:23,958] Trial 13 finished with value: 0.566 and parameters: {'learning_rate': 0.05710611463977297, 'depth': 9, 'l2_leaf_reg': 4.2048210100149435}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_13 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/4e03a75f970e43539099759e6ea9bf8d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:25,352] Trial 14 finished with value: 0.593 and parameters: {'learning_rate': 0.02539103142171328, 'depth': 9, 'l2_leaf_reg': 2.57693774467556}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_14 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/d41f70280b594843b3a8ddc9f62d4a83
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:26,785] Trial 15 finished with value: 0.556 and parameters: {'learning_rate': 0.09672338830100866, 'depth': 9, 'l2_leaf_reg': 2.0093794150920257}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_15 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/e1ed1c90c117403e88c6501a4a0f2828
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:28,106] Trial 16 finished with value: 0.572 and parameters: {'learning_rate': 0.11752482442830568, 'depth': 7, 'l2_leaf_reg': 1.4450336538325919}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_16 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/a4aabba72dbd496d97bfc7f771dd4298
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:29,561] Trial 17 finished with value: 0.566 and parameters: {'learning_rate': 0.10435882230584731, 'depth': 7, 'l2_leaf_reg': 4.499440220053293}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_17 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/8fa6e30de4d345bfa756b9a9c988e193
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:31,047] Trial 18 finished with value: 0.558 and parameters: {'learning_rate': 0.1837352199766314, 'depth': 9, 'l2_leaf_reg': 8.133778989170452}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_18 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/17badc1204804d458dfb267fcf9abe81
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:32,546] Trial 19 finished with value: 0.595 and parameters: {'learning_rate': 0.29596284206078866, 'depth': 10, 'l2_leaf_reg': 8.471764723828645}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_19 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/3cc7e20a57db49678233c450d23856ff
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:33,973] Trial 20 finished with value: 0.574 and parameters: {'learning_rate': 0.18348783233288368, 'depth': 9, 'l2_leaf_reg': 8.333053471044082}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_20 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/f83d3f4d72d148b2b3b43ba36ebc6190
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:35,310] Trial 21 finished with value: 0.578 and parameters: {'learning_rate': 0.16195228630570985, 'depth': 8, 'l2_leaf_reg': 9.978832409877402}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_21 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/e12bb534226e437abeca5dfdf4929612
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:36,703] Trial 22 finished with value: 0.568 and parameters: {'learning_rate': 0.07798353614492506, 'depth': 9, 'l2_leaf_reg': 5.14456034767506}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_22 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/350193e17226426990774d1ffce94b2b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:38,062] Trial 23 finished with value: 0.567 and parameters: {'learning_rate': 0.13487130212775789, 'depth': 8, 'l2_leaf_reg': 1.7314887116553872}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_23 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/5e12a8893d8446a1a18155ba8b4d10c8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:39,630] Trial 24 finished with value: 0.573 and parameters: {'learning_rate': 0.08732982437982302, 'depth': 10, 'l2_leaf_reg': 7.344343743413596}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_24 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b38633f2734a4073a26deb97f4f93c72
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:40,953] Trial 25 finished with value: 0.56 and parameters: {'learning_rate': 0.062063456685780956, 'depth': 7, 'l2_leaf_reg': 3.5547510645237077}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_25 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/331730f65bcd42d4a6bf73badf5a9651
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:42,340] Trial 26 finished with value: 0.592 and parameters: {'learning_rate': 0.2086899224996805, 'depth': 7, 'l2_leaf_reg': 3.4021337686799744}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_26 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/5d521dc7e1184740b662c9753403790d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:43,699] Trial 27 finished with value: 0.573 and parameters: {'learning_rate': 0.1329085530372941, 'depth': 7, 'l2_leaf_reg': 3.5605971659223257}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_27 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/10c74a0faac84f54bff32d4ed244d513
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:45,136] Trial 28 finished with value: 0.564 and parameters: {'learning_rate': 0.09194263980021311, 'depth': 9, 'l2_leaf_reg': 5.137041535688812}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_28 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/acd8aa9ad23c4e56a817dc5e28713ac4
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:46,777] Trial 29 finished with value: 0.584 and parameters: {'learning_rate': 0.06267845792872966, 'depth': 10, 'l2_leaf_reg': 8.98997023752528}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_29 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/1d744460613147b7b97729a8942630f0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:48,113] Trial 30 finished with value: 0.573 and parameters: {'learning_rate': 0.03761753188744309, 'depth': 6, 'l2_leaf_reg': 6.998820753769471}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_30 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/d4d52fcc9b544fad93e2324cb0206afb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:49,471] Trial 31 finished with value: 0.571 and parameters: {'learning_rate': 0.04961718109516873, 'depth': 8, 'l2_leaf_reg': 1.8410484636196525}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_31 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/1bcc8cc963014d849253bcf3478de247
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:50,840] Trial 32 finished with value: 0.566 and parameters: {'learning_rate': 0.06685138097762715, 'depth': 8, 'l2_leaf_reg': 2.091670467569017}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_32 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/ef9077621fb4460daf08dfb52c010330
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:52,321] Trial 33 finished with value: 0.587 and parameters: {'learning_rate': 0.02770040573789249, 'depth': 9, 'l2_leaf_reg': 2.8273945769723783}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_33 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/8034ce3354914917aa3515bb3896979c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:53,856] Trial 34 finished with value: 0.578 and parameters: {'learning_rate': 0.05555697575774188, 'depth': 10, 'l2_leaf_reg': 2.0657801657717707}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_34 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/dd2cd32bdf04430e85104f9cd62bd9ce
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:55,151] Trial 35 finished with value: 0.573 and parameters: {'learning_rate': 0.09239698231223484, 'depth': 7, 'l2_leaf_reg': 3.219866506154216}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_35 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/d6fc11c57eab43e5ab77550d0fb2132c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:56,477] Trial 36 finished with value: 0.573 and parameters: {'learning_rate': 0.03962913996825942, 'depth': 5, 'l2_leaf_reg': 2.2228039618690874}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_36 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/ec660f7bb2694199bfda73bdb38f2deb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:57,729] Trial 37 finished with value: 0.566 and parameters: {'learning_rate': 0.14233653116985945, 'depth': 4, 'l2_leaf_reg': 5.994678501559357}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_37 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/ab997ab4882f41b6b3c44030d1e80208
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:45:59,100] Trial 38 finished with value: 0.579 and parameters: {'learning_rate': 0.11006384936785527, 'depth': 8, 'l2_leaf_reg': 1.0455235471932482}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_38 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/59726e426185495996fd5cd449666de9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:00,509] Trial 39 finished with value: 0.56 and parameters: {'learning_rate': 0.23979317837460828, 'depth': 9, 'l2_leaf_reg': 4.134106001587102}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_39 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b4ec01cb47164547b1b1635c8fc5b21d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:02,101] Trial 40 finished with value: 0.571 and parameters: {'learning_rate': 0.21397443697466811, 'depth': 10, 'l2_leaf_reg': 4.843465157754345}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_40 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b81b5fca4b724110b1dda2237422b1d9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:03,546] Trial 41 finished with value: 0.59 and parameters: {'learning_rate': 0.2846583896897537, 'depth': 9, 'l2_leaf_reg': 3.868683363965411}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_41 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/eb924f3be04e4e0c98a705bdc9589424
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:04,970] Trial 42 finished with value: 0.563 and parameters: {'learning_rate': 0.2406039739256781, 'depth': 9, 'l2_leaf_reg': 2.9035868965447067}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_42 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/3b2c5780111342fd86e22609c92df943
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:06,327] Trial 43 finished with value: 0.551 and parameters: {'learning_rate': 0.16461139272031802, 'depth': 8, 'l2_leaf_reg': 4.063417976603828}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_43 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/e54948c0cc214727a823583d629c65fe
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:07,745] Trial 44 finished with value: 0.582 and parameters: {'learning_rate': 0.16967815338691372, 'depth': 9, 'l2_leaf_reg': 4.0276643415442805}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_44 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/b723fb5eaa804ea5825fbf85bde01924
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:09,134] Trial 45 finished with value: 0.57 and parameters: {'learning_rate': 0.1996511592157663, 'depth': 6, 'l2_leaf_reg': 6.065477502762828}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_45 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/9e63c647e90c4c3b92c760cc3857c33b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:10,540] Trial 46 finished with value: 0.574 and parameters: {'learning_rate': 0.14984901465905573, 'depth': 8, 'l2_leaf_reg': 4.54110049051632}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_46 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/af295675e5de4988bbc5b52de6097c42
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:11,848] Trial 47 finished with value: 0.567 and parameters: {'learning_rate': 0.12466480786239399, 'depth': 7, 'l2_leaf_reg': 3.8161689440056277}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_47 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/4912c65d76224019b0373bea42cfe4c7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:13,304] Trial 48 finished with value: 0.566 and parameters: {'learning_rate': 0.24549414786515986, 'depth': 9, 'l2_leaf_reg': 5.495720049674533}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_48 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/281abccbd8c445a2b3bcf3fbd297e992
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


[I 2025-08-27 12:46:14,663] Trial 49 finished with value: 0.568 and parameters: {'learning_rate': 0.07842255349941261, 'depth': 8, 'l2_leaf_reg': 3.215662322272498}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_49 at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/cff1fc5e17c445239d1a50139aa4c565
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005
Best validation RMSE: 0.5510
Test RMSE: 0.5768
🏃 View run Catboost_optuna at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/7517e09c15974f5b92eb030c9d9f268d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


# Loading the best optuna model

Now, note that when we run the parent run, we re-train the best optuna model on the entire training set and log a tag - best_mode: true. This allows us to easily load the best model, use it for predictions and consider moving the model to production.

In [37]:
# Search across all logged models for the tag
top_models = mlflow.search_logged_models(
    filter_string="tags.best_model = 'true'",
    order_by=[{"field_name": "last_updated_timestamp", "ascending": False}],
    max_results=1
)

# Get the latest "best_model"
best_model = top_models.iloc[0]

# Extract the model URI
model_uri = best_model['model_id']

# Load it
loaded_model = mlflow.pyfunc.load_model(f'models:/{model_uri}')

# Use it
y_pred = loaded_model.predict(x_test)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE on test set: {rmse_test}")

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

RMSE on test set: 0.5767685743260196


# Beyond basics - what do you need to log to MLflow?

In [41]:
# Load datasets
df = pd.read_csv('WineQT.csv', sep=',')

# Split the data into training, validation, and test sets
train, test = train_test_split(df, test_size=0.25, random_state=42)
x_train = train.drop(["quality"], axis=1).values
y_train = train[["quality"]].values.ravel()
x_test= test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

### Signatures and input examples

In [None]:
# Setting hyperparameters for CatBoost
catboost_params = {
    'iterations': 100,
    'learning_rate': 0.1,
    'depth': 6,
    'random_seed': 42,
    'verbose': False
}

# Train the model and make predictions
model = CatBoostRegressor(**catboost_params)
model.fit(x_train, y_train)
y_pred_test = model.predict(x_test)

# Infer signature from data
signature = infer_signature(x_test, y_pred_test)
input_example = x_train[:5]

# Start an MLflow run
with mlflow.start_run(run_name='catboost_with_signature'):
    
    # Log CatBoost model with model example and signature
        model_info = mlflow.catboost.log_model(
            cb_model=trained_model,
            name="wine_model_catboost",
            input_example=input_example,
            signature=signature
        )


inputs: 
  [Tensor('float64', (-1, 12))]
outputs: 
  [Tensor('float64', (-1,))]
params: 
  None

### Artifacts

In [None]:
# --- Single parameter for Lasso
alpha = 0.01  

# --- Fit scaler separately
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled  = scaler.transform(x_test)

# --- Fit Lasso
model = Lasso(alpha=alpha, random_state=42, max_iter=10000)
model.fit(X_train_scaled, y_train)
y_pred_test = model.predict(X_test_scaled)

# --- Signature & input example (raw features as input)
signature = infer_signature(x_test, y_pred_test)
input_example = x_train[:5]

# --- Local paths for artifacts
run_tmp = Path("mlflow_local_artifacts") / str(uuid.uuid4())
run_tmp.mkdir(parents=True, exist_ok=True)
scaler_path = run_tmp / "standard_scaler.pkl"
plot_path   = run_tmp / "pred_vs_true.png"

# Save scaler separately
joblib.dump(scaler, scaler_path)


🏃 View run lasso_artifacts at: http://127.0.0.1:8080/#/experiments/677703609984501005/runs/48e95d95802e470baf9423d98de8170a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/677703609984501005


In [None]:
# --- Plot Predicted vs True
plt.figure(figsize=(6, 6))
plt.scatter(y_test, y_pred_test, alpha=0.6)
plt.xlabel("True values")
plt.ylabel("Predicted values")
plt.title("Predicted vs. True (Lasso)")
axis_min = min(np.min(y_test), np.min(y_pred_test))
axis_max = max(np.max(y_test), np.max(y_pred_test))
plt.plot([axis_min, axis_max], [axis_min, axis_max], linestyle="--")
plt.tight_layout()
plt.savefig(plot_path, dpi=150)
plt.close()

# --- MLflow logging
with mlflow.start_run(run_name="lasso_artifacts"):
    # Log artifacts (scaler + plot)
    mlflow.log_artifact(str(scaler_path), artifact_path="artifacts")
    mlflow.log_artifact(str(plot_path),   artifact_path="artifacts")

    # Log Lasso model (trained on scaled inputs)
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="wine_model_lasso",
        signature=signature,
        input_example=input_example
    )