In [1]:
import numpy as np
import pandas as pd
import mlflow
import optuna
from optuna import study
import mlflow.sklearn
from catboost import CatBoostRegressor
from mlflow.tracking import MlflowClient
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from typing import Dict, Any, Union

# Starting ML Tracking Server

In the terminal, navigate to the notebook folder and the start the ml server

Run in the terminal (command line): mlflow server --host 127.0.0.1 --port 8080

**This is what it does:**

1. Starts the MLflow Tracking Server
A dedicated process that manages and serves your MLflow experiments.

2. Provides a Web UI
Accessible at http://127.0.0.1:8080 (or localhost:8080), where you can browse experiments, runs, parameters, metrics, and artifacts.

3. Exposes a Tracking API Endpoint
Other scripts or notebooks can log directly to this server if you set - **mlflow.set_tracking_uri("http://127.0.0.1:8080")**

4. MLflow automatically creates a folder mlruns/ in your working directory the first time you log something.
Inside mlruns/, it creates subfolders for:

- each experiment (default is 0)

- each run within that experiment

# Setting URI

In [2]:
# Set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

What it does:

1. It tells your MLflow client (your script/notebook) where to send all logging data (experiments, runs, params, metrics, artifacts).

3. Since our host is local, it will still write to mlruns, but here you could configure a remote host URI.

In [3]:
print("Current Tracking URI:", mlflow.get_tracking_uri())

Current Tracking URI: http://127.0.0.1:8080


# Fitting the model

Prepare the data and fit the model

In [4]:
# Load datasets
df = pd.read_csv('WineQT.csv', sep=',')

# Split the data into training, validation, and test sets
train, test = train_test_split(df, test_size=0.25, random_state=42)
x_train = train.drop(["quality"], axis=1).values
y_train = train[["quality"]].values.ravel()
x_test= test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

# Setting hyperparameter
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(x_train, y_train)

# Predicting
y_pred_val = model.predict(x_val)
y_pred_test = model.predict(x_test)

# Calculating metrics
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353


# Running first experiment

Create an MLFLow Experiment. It will create a folder with artifact_location= ... name in the mlruns folder

In [5]:
# Set the experiment name - it also creates an experiment if it doesn't exist
mlflow.set_experiment("Wine Quality Regression")

<Experiment: artifact_location='mlflow-artifacts:/626129081351815138', creation_time=1756222817145, experiment_id='626129081351815138', last_update_time=1756222817145, lifecycle_stage='active', name='Wine Quality Regression', tags={}>

In [6]:
# Setting hyperparameters
params = {
    'alpha': model.alpha
}

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Log the hyperparameters
    mlflow.log_params(params)
    # Log the loss metric
    mlflow.log_metric("rmse_val", rmse_val)
    mlflow.log_metric("rmse_test", rmse_test)
    # Example artifact
    # with open("metrics.txt", "w") as f:
    #     f.write(f"RMSE val: {rmse_val}\nRMSE test: {rmse_test}")
    # mlflow.log_artifact("metrics.txt")

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("model_version", "baseline")

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="wine_model_lasso",
        input_example=x_train,
        # registered_model_name="lasso_baseline_1",
    )

🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/626129081351815138/runs/555a5516f59f467587bfc6dab50300eb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/626129081351815138


In [7]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
run_id = "555a5516f59f467587bfc6dab50300eb"

# Download file to local dir
local_path = client.download_artifacts(run_id, "model.pkl", "/tmp")
print("Downloaded to:", local_path)

# Read file
with open(local_path, "r") as f:
    content = f.read()
print("File content:\n", content)


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x121b4c0d0>>
Traceback (most recent call last):
  File "/Users/timurbikmuhametov/miniconda3/envs/mlacademy-tutorials/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 781, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 

KeyboardInterrupt



# Running MLflow experiments with different models

In [7]:
def train_model(df: pd.DataFrame, params: dict, model='Lasso'):
    """
    Train a model and return the metrics
    """
    # Split the data into training, validation, and test sets
    train, test = train_test_split(df, test_size=0.25, random_state=42)
    x_train = train.drop(["quality"], axis=1).values
    y_train = train[["quality"]].values.ravel()
    x_test= test.drop(["quality"], axis=1).values
    y_test = test[["quality"]].values.ravel()

    # Split the data into training and validation sets
    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2, random_state=42
    )

    if model == 'Lasso':
        model = Lasso(**params)
    elif model == 'CatBoost':
        model = CatBoostRegressor(**params)
    else:
        raise ValueError(f"Model {model} not supported")

    # Fitting the model
    model.fit(x_train, y_train)
    # Predicting
    y_pred_val = model.predict(x_val)
    y_pred_test = model.predict(x_test)

    # Calculating metrics
    rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')
    return model,rmse_val, rmse_test

In [8]:
def log_run(
        run_name: str,
        params: dict,
        metrics: dict,
        tags: dict,
        trained_model,
        model_type='Lasso',
        input_example=None,
        registered_model_name=None):
    """
    Log a run to MLflow
    """
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.set_tag("model_version", run_name)
    mlflow.set_tags(tags)

    if model_type == 'Lasso':
        # Log sklearn model
        model_info = mlflow.sklearn.log_model(
            sk_model=trained_model,
            name="wine_model_lasso",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    elif model_type == 'CatBoost':
        # Log CatBoost model
        model_info = mlflow.catboost.log_model(
            cb_model=trained_model,
            name="wine_model_catboost",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    else:
        raise ValueError(f"Model type {model_type} not supported")
    
    return model_info

### Running LASSO

In [9]:
# Setting hyperparameters

lasso_params = {
    'alpha': 0.1,
    'max_iter': 1000,
    'random_state': 42
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, lasso_params, model='Lasso')

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    # Let's add some tags to the run
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="lasso_baseline",
        params=lasso_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='Lasso',
    )
    
    print(f"Model logged with URI: {model_info.model_uri}")
    print(f"RMSE validation: {rmse_val:.4f}")
    print(f"RMSE test: {rmse_test:.4f}")

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353




Model logged with URI: models:/m-bc43830338814c42ad09ede1ee788e9f
RMSE validation: 0.6356
RMSE test: 0.6386
🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/e0154877457c42c7a8bd96e9ca1cc74e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


In [73]:
# Setting hyperparameters for CatBoost
catboost_params = {
    'iterations': 100,
    'learning_rate': 0.1,
    'depth': 6,
    'random_seed': 42,
    'verbose': False
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, catboost_params, model='CatBoost')

# Start an MLflow run
with mlflow.start_run(run_name='catboost_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
        'data_version': 'v1',
        'algorithm': 'gradient_boosting'
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="catboost_baseline",
        params=catboost_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='CatBoost',
    )
    
    print(f"Model logged with URI: {model_info.model_uri}")
    print(f"RMSE validation: {rmse_val:.4f}")
    print(f"RMSE test: {rmse_test:.4f}")

RMSE val = 0.5558687336128091, RMSE test = 0.5729224664412881




Model logged with URI: models:/m-0ae710e58547414abd78215b716e77a9
RMSE validation: 0.5559
RMSE test: 0.5729
🏃 View run catboost_baseline at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/36604e3cdf394aff802eaa9f5d3a3060
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


# Searching for the best model

### Searching by runs

In [25]:
# Get experiment by name
experiment_name = "Wine Quality Regression"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment '{experiment_name}' has ID: {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found")

Experiment 'Wine Quality Regression' has ID: 830731795768476244


In [32]:
top_models = mlflow.search_runs(
    experiment_ids=[str(experiment_id)],
    filter_string="metrics.rmse_test < 0.9",
    order_by=["metrics.rmse_val ASC"],   # use string, not dict
)
# Get the best run (lowest validation RMSE)
best_model = top_models.iloc[0]
best_model

run_id                                      3f71187083cd412ba69b04108f2bb432
experiment_id                                             830731795768476244
status                                                              FINISHED
artifact_uri               mlflow-artifacts:/830731795768476244/3f7118708...
start_time                                  2025-08-26 07:11:46.033000+00:00
end_time                                    2025-08-26 07:11:47.165000+00:00
metrics.rmse_val                                                    0.550561
metrics.rmse_test                                                   0.576752
params.verbose                                                         False
params.l2_leaf_reg                                         4.063417976603828
params.depth                                                               8
params.iterations                                                        100
params.random_seed                                                        42

In [33]:
run_id = best_model.to_dict()['run_id']
loaded = mlflow.pyfunc.load_model(f"runs:/{run_id}/model_catboost")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [53]:
# Load the latest best model (adjust artifact path if needed)
model_uri = f"runs:/{run_id}/best_model_lasso"  # or ridge_model, etc.
latest_best_model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [45]:
top_models

Unnamed: 0,artifact_location,creation_timestamp,experiment_id,last_updated_timestamp,metrics,model_id,model_type,name,params,source_run_id,status,status_message,tags
0,mlflow-artifacts:/830731795768476244/models/m-...,1756192306055,830731795768476244,1756192307163,"[<Metric: dataset_digest=None, dataset_name=No...",m-d76ab93137b640c8993748f4178215d4,,spiffy-dog-12,"{'depth': '8', 'l2_leaf_reg': '4.0634179766038...",3f71187083cd412ba69b04108f2bb432,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
1,mlflow-artifacts:/830731795768476244/models/m-...,1756193083558,830731795768476244,1756193084600,"[<Metric: dataset_digest=None, dataset_name=No...",m-b61c642855b04df79bcdef51f57b432b,,wine_model_catboost,"{'depth': '6', 'learning_rate': '0.1', 'verbos...",eefd64c044d04fffbaf32140fb5c7956,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
2,mlflow-artifacts:/830731795768476244/models/m-...,1756192223680,830731795768476244,1756192224728,"[<Metric: dataset_digest=None, dataset_name=No...",m-8cd5b06b8c7b4bd2a09d36319817e91d,,wine_model_catboost,"{'depth': '6', 'learning_rate': '0.1', 'verbos...",8f626d2e0fb54f6db58031f155b2c663,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
3,mlflow-artifacts:/830731795768476244/models/m-...,1756192270498,830731795768476244,1756192271574,"[<Metric: dataset_digest=None, dataset_name=No...",m-da672ccacd2d4f4d81fc4cf9e527b8f3,,painted-vole-131,"{'depth': '9', 'l2_leaf_reg': '2.0093794150920...",4b4e6971a0334cc68eb474a25f2334fd,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
4,mlflow-artifacts:/830731795768476244/models/m-...,1756192274109,830731795768476244,1756192275294,"[<Metric: dataset_digest=None, dataset_name=No...",m-f7bf14510e5d4457a1670b4fa1c4bf88,,persistent-hound-566,"{'depth': '9', 'l2_leaf_reg': '8.1337789891704...",1479fa2e683a4e9ebae1e9ec837f465f,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,mlflow-artifacts:/830731795768476244/models/m-...,1756192238594,830731795768476244,1756192239423,"[<Metric: dataset_digest=None, dataset_name=No...",m-4f4e94ac7c624c2eabab75031c15a4be,,resilient-dolphin-10,{'alpha': '0.2154434690031884'},42f12ac26d7b4cda92b76ef3bc55636f,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
72,mlflow-artifacts:/830731795768476244/models/m-...,1756193168834,830731795768476244,1756193169656,"[<Metric: dataset_digest=None, dataset_name=No...",m-41f9f176ed2c424f84eb510f34705d50,,nervous-deer-363,{'alpha': '0.4641588833612779'},e04619e4b1a74c5cb2a4824896aaf925,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
73,mlflow-artifacts:/830731795768476244/models/m-...,1756192237741,830731795768476244,1756192238565,"[<Metric: dataset_digest=None, dataset_name=No...",m-e1ed3129a3304c38bf002824904054fb,,righteous-ant-325,{'alpha': '0.4641588833612779'},763c8eab7a20457091061ffa2e196b9a,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
74,mlflow-artifacts:/830731795768476244/models/m-...,1756193167957,830731795768476244,1756193168804,"[<Metric: dataset_digest=None, dataset_name=No...",m-c3cafa554a954fad838b44498699e6a1,,dazzling-stoat-177,{'alpha': '1.0'},709a3b328ea943128bab39f65058960c,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."


### Searching by logged models

In [42]:
# Get experiment by name
experiment_name = "Wine Quality Regression"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment '{experiment_name}' has ID: {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found")

Experiment 'Wine Quality Regression' has ID: 830731795768476244


In [43]:
# Find high-performing models across experiments
top_models = mlflow.search_logged_models(
    experiment_ids=[str(experiment_id)],
    filter_string="metrics.rmse_test < 0.9",
    order_by=[{"field_name": "metrics.rmse_val", "ascending": True}],
)
best_model = top_models.iloc[0]
best_model

artifact_location         mlflow-artifacts:/830731795768476244/models/m-...
creation_timestamp                                            1756192306055
experiment_id                                            830731795768476244
last_updated_timestamp                                        1756192307163
metrics                   [<Metric: dataset_digest=None, dataset_name=No...
model_id                                 m-d76ab93137b640c8993748f4178215d4
model_type                                                                 
name                                                          spiffy-dog-12
params                    {'depth': '8', 'l2_leaf_reg': '4.0634179766038...
source_run_id                              3f71187083cd412ba69b04108f2bb432
status                                                                READY
status_message                                                             
tags                      {'mlflow.user': 'timurbikmuhametov', 'mlflow.s...
Name: 0, dty

In [44]:
# Load the best model for inference
loaded_model = mlflow.pyfunc.load_model(f"models:/{best_model.model_id}")
y_pred = loaded_model.predict(x_test)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE on test set: {rmse_test}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

RMSE on test set: 0.5767523306773067


## Child runs

### Grid search for LASSO

In [14]:
def log_run_child(
        run_name: str,
        params: dict,
        metrics: dict,
        tags: dict,
        trained_model,
        model_type='Lasso',
        input_example=None,
        registered_model_name=None,
        iteration=None
        ):
    """
    Log a run to MLflow
    """
    with mlflow.start_run(run_name=f"{run_name}_iteration_{iteration}", nested=True):
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)
        mlflow.set_tag("model_version", run_name)
        mlflow.set_tags(tags)

        if model_type == 'Lasso':
            # Log sklearn model
            model_info = mlflow.sklearn.log_model(
                sk_model=trained_model,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )
        elif model_type == 'CatBoost':
            # Log CatBoost model
            model_info = mlflow.catboost.log_model(
                cb_model=trained_model,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )
        else:
            raise ValueError(f"Model type {model_type} not supported")
    
    return model_info

In [15]:
# Specify several alphas
alpha_range = np.logspace(0, -3, num=10).tolist()
run_name = 'Lasso_child_runs_1_0.001'

with mlflow.start_run(run_name=run_name):
    for idx, alpha in enumerate(alpha_range):
        params = {'alpha': alpha}
        # Train the model
        trained_model, rmse_val, rmse_test = train_model(df, params, model='Lasso')
        # Prepare metrics
        metrics = {'rmse_val': rmse_val, 'rmse_test': rmse_test}
    # Log everything using our function
        model_info = log_run_child(
            run_name=run_name,
            params=params,
            metrics=metrics,
            tags=tags,
            trained_model=trained_model,
            model_type='Lasso',
            iteration=idx
        )

RMSE val = 0.7546000742808262, RMSE test = 0.7374379792573892




🏃 View run Lasso_child_runs_1_0.001_iteration_0 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/709a3b328ea943128bab39f65058960c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.7541178879815691, RMSE test = 0.7366319489063405




🏃 View run Lasso_child_runs_1_0.001_iteration_1 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/e04619e4b1a74c5cb2a4824896aaf925
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.6762060849767165, RMSE test = 0.6743324662326164




🏃 View run Lasso_child_runs_1_0.001_iteration_2 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/cc998ebe8dfe4fee8609a5c6e36eeb3c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353




🏃 View run Lasso_child_runs_1_0.001_iteration_3 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/9f50613886554184a1597768e5adaaa0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.6255559065512195, RMSE test = 0.628923587071802




🏃 View run Lasso_child_runs_1_0.001_iteration_4 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/ad8c52dd948d4d5f90109a77e8c156cb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.5921306582799698, RMSE test = 0.6128059355142866




🏃 View run Lasso_child_runs_1_0.001_iteration_5 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/d62791a74beb4fbe91cd912f8f388758
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.5803887110911056, RMSE test = 0.605750602168249




🏃 View run Lasso_child_runs_1_0.001_iteration_6 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/b557493a5b494f0a81d46cf1520e51b7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.5771690201433481, RMSE test = 0.6060892593315526




🏃 View run Lasso_child_runs_1_0.001_iteration_7 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/82303a327f144f7ea187491b02d139c9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.5726741757910615, RMSE test = 0.6072882839308418




🏃 View run Lasso_child_runs_1_0.001_iteration_8 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/f0690631b3cd465fb8300f7f8c722298
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
RMSE val = 0.5713198123541341, RMSE test = 0.6098083649853254




🏃 View run Lasso_child_runs_1_0.001_iteration_9 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/1038d33468464679b53e3a7b49145797
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
🏃 View run Lasso_child_runs_1_0.001 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/ec94a86470794afca586ef7c6d140e23
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


### Child runs with Optuna

In [16]:
def objective(
    trial: optuna.Trial, 
    x_train: np.ndarray, 
    y_train: np.ndarray, 
    x_val: np.ndarray, 
    y_val: np.ndarray, 
    x_test: np.ndarray, 
    y_test: np.ndarray
) -> float:
    np.random.seed(42)
    """Optimize CatBoost hyperparameters using Optuna."""
    # Sample CatBoost hyperparameters (fixed iterations for faster trials)
    params: Dict[str, Union[int, float, bool]] = {
        'iterations': 100,  # Fixed iterations for trials
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'random_seed': 42,
        'verbose': False
    }
    
    model: CatBoostRegressor = CatBoostRegressor(**params)
    model.fit(x_train, y_train)
    
    # Predictions
    y_pred_val: np.ndarray = model.predict(x_val)
    y_pred_test: np.ndarray = model.predict(x_test)
    
    # Errors
    rmse_val: float = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test: float = np.sqrt(mean_squared_error(y_test, y_pred_test))
    
    # Log the run with log_run_child
    log_run_child(
        run_name=run_name,
        params=params,
        metrics={'rmse_val': rmse_val, 'rmse_test': rmse_test},
        tags={'model_version': 'catboost_optuna', 'trial': trial.number},
        trained_model=model,
        model_type='CatBoost',
        iteration=trial.number
    )
    
    return round(rmse_val, 3)

In [17]:
run_name = 'Catboost_optuna'
with mlflow.start_run(run_name=run_name):
    np.random.seed(42)
    # Create the study and optimize
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(
        lambda trial: objective(trial, x_train, y_train, x_val, y_val, x_test, y_test),
        n_trials=50
    )
    
    # Log the best metrics and parameters from the parent run
    mlflow.log_metric("best_rmse", study.best_value)
    mlflow.log_params(study.best_params)
    mlflow.set_tag('best_model', 'true')
    
    # Train the best model with early stopping (more iterations)
    best_params = study.best_params.copy()
    best_params.update({
        'iterations': 1000,  # More iterations for final model
        'early_stopping_rounds': 50,  # Early stopping
    })
    
    best_model = CatBoostRegressor(**best_params, random_seed=42)
    best_model.fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=False)
    
    # Compute test score for the best model
    y_pred_test = best_model.predict(x_test)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    
    # Log the test score
    mlflow.log_metric("rmse_test_best", rmse_test)
    
    # Log the best model in the parent run
    model_info = mlflow.sklearn.log_model(
        sk_model=best_model,
        name="best_catboost_model",
        input_example=x_train,
    )
    
    print(f"Best validation RMSE: {study.best_value:.4f}")
    print(f"Test RMSE: {rmse_test:.4f}")

[I 2025-08-26 11:10:50,966] A new study created in memory with name: no-name-d03a183d-377d-488b-87e6-2e810d933c2b
[I 2025-08-26 11:10:52,311] Trial 0 finished with value: 0.596 and parameters: {'learning_rate': 0.03574712922600244, 'depth': 10, 'l2_leaf_reg': 7.587945476302646}. Best is trial 0 with value: 0.596.


🏃 View run Catboost_optuna_iteration_0 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/524ed5c586854d019ca57d263959d491
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:53,417] Trial 1 finished with value: 0.574 and parameters: {'learning_rate': 0.07661100707771368, 'depth': 5, 'l2_leaf_reg': 2.403950683025824}. Best is trial 1 with value: 0.574.


🏃 View run Catboost_optuna_iteration_1 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/5a9225433bb64996aced6d731c6a0456
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:54,748] Trial 2 finished with value: 0.653 and parameters: {'learning_rate': 0.012184186502221764, 'depth': 10, 'l2_leaf_reg': 6.41003510568888}. Best is trial 1 with value: 0.574.


🏃 View run Catboost_optuna_iteration_2 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/c35cd45a588c489aa99cf5e9164e3229
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:55,882] Trial 3 finished with value: 0.566 and parameters: {'learning_rate': 0.11114989443094977, 'depth': 4, 'l2_leaf_reg': 9.72918866945795}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_3 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/0fc3d3510a284f429ffacd4965805297
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:57,000] Trial 4 finished with value: 0.568 and parameters: {'learning_rate': 0.16967533607196555, 'depth': 5, 'l2_leaf_reg': 2.636424704863906}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_4 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/a8d2c6eba8ae483fa586ef4bb440ad84
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:58,136] Trial 5 finished with value: 0.601 and parameters: {'learning_rate': 0.018659959624904916, 'depth': 6, 'l2_leaf_reg': 5.72280788469014}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_5 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/2ca3f8df9fe540cc97dfd58819576965
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:10:59,256] Trial 6 finished with value: 0.571 and parameters: {'learning_rate': 0.04345454109729477, 'depth': 6, 'l2_leaf_reg': 6.506676052501415}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_6 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/27299a3577a245998429036e9c071731
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:00,497] Trial 7 finished with value: 0.605 and parameters: {'learning_rate': 0.01607123851203988, 'depth': 6, 'l2_leaf_reg': 4.297256589643226}. Best is trial 3 with value: 0.566.


🏃 View run Catboost_optuna_iteration_7 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/866f662fcdc643daa908830cad905c12
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:01,850] Trial 8 finished with value: 0.565 and parameters: {'learning_rate': 0.04717052037625178, 'depth': 9, 'l2_leaf_reg': 2.7970640394252375}. Best is trial 8 with value: 0.565.


🏃 View run Catboost_optuna_iteration_8 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/02820ce72f834b2d8336d3d110f07562
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:03,151] Trial 9 finished with value: 0.562 and parameters: {'learning_rate': 0.05748924681991978, 'depth': 8, 'l2_leaf_reg': 1.4180537144799796}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_9 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/863c50423a4f4a41bd8cbf41f72dd2a8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:04,757] Trial 10 finished with value: 0.598 and parameters: {'learning_rate': 0.24893231508461813, 'depth': 8, 'l2_leaf_reg': 1.0679258738466384}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_10 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/1c58279a88cc4aa0992e217ee5e00f8c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:06,162] Trial 11 finished with value: 0.574 and parameters: {'learning_rate': 0.03253425182484387, 'depth': 8, 'l2_leaf_reg': 3.5584186834693132}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_11 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/bbbc20eafb104ef2b5c0bacea5e34909
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:07,470] Trial 12 finished with value: 0.565 and parameters: {'learning_rate': 0.07186384123660489, 'depth': 8, 'l2_leaf_reg': 1.347215194590903}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_12 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/e1be2fd4fd114b09abe4ad1ecf465d6d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:08,862] Trial 13 finished with value: 0.566 and parameters: {'learning_rate': 0.05710611463977297, 'depth': 9, 'l2_leaf_reg': 4.2048210100149435}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_13 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/7141d7e6493d450e8b70898575338a86
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:10,299] Trial 14 finished with value: 0.593 and parameters: {'learning_rate': 0.02539103142171328, 'depth': 9, 'l2_leaf_reg': 2.57693774467556}. Best is trial 9 with value: 0.562.


🏃 View run Catboost_optuna_iteration_14 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/6e63877fb53447298b963ab740bec444
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:11,581] Trial 15 finished with value: 0.556 and parameters: {'learning_rate': 0.09672338830100866, 'depth': 9, 'l2_leaf_reg': 2.0093794150920257}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_15 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/4b4e6971a0334cc68eb474a25f2334fd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:12,735] Trial 16 finished with value: 0.572 and parameters: {'learning_rate': 0.11752482442830568, 'depth': 7, 'l2_leaf_reg': 1.4450336538325919}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_16 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/0015e9ba1c7241fb9f27afa53cc08e2a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:13,923] Trial 17 finished with value: 0.566 and parameters: {'learning_rate': 0.10435882230584731, 'depth': 7, 'l2_leaf_reg': 4.499440220053293}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_17 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/1ae43bc73a9c469f9cb32ad2c514d339
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:15,303] Trial 18 finished with value: 0.558 and parameters: {'learning_rate': 0.1837352199766314, 'depth': 9, 'l2_leaf_reg': 8.133778989170452}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_18 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/1479fa2e683a4e9ebae1e9ec837f465f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:16,735] Trial 19 finished with value: 0.595 and parameters: {'learning_rate': 0.29596284206078866, 'depth': 10, 'l2_leaf_reg': 8.471764723828645}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_19 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/9990c0969bce4247b7cb43b1661f18e6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:18,006] Trial 20 finished with value: 0.574 and parameters: {'learning_rate': 0.18348783233288368, 'depth': 9, 'l2_leaf_reg': 8.333053471044082}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_20 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/333adbc7eb784750b6525a97541f7947
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:19,269] Trial 21 finished with value: 0.578 and parameters: {'learning_rate': 0.16195228630570985, 'depth': 8, 'l2_leaf_reg': 9.978832409877402}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_21 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/8c80e58586da4e25987d8dd4065ba5c5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:20,540] Trial 22 finished with value: 0.568 and parameters: {'learning_rate': 0.07798353614492506, 'depth': 9, 'l2_leaf_reg': 5.14456034767506}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_22 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/e3f43b26720c4421a2b9b2ff8e7d6e45
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:21,745] Trial 23 finished with value: 0.567 and parameters: {'learning_rate': 0.13487130212775789, 'depth': 8, 'l2_leaf_reg': 1.7314887116553872}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_23 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/053c5e3f866d467d83fe71792b80b28d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:23,110] Trial 24 finished with value: 0.573 and parameters: {'learning_rate': 0.08732982437982302, 'depth': 10, 'l2_leaf_reg': 7.344343743413596}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_24 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/b86c926b3f104966ab4fb9726abba8a3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:24,320] Trial 25 finished with value: 0.56 and parameters: {'learning_rate': 0.062063456685780956, 'depth': 7, 'l2_leaf_reg': 3.5547510645237077}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_25 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/37db64ba93f34dcd846135f7f63aeb8f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:25,504] Trial 26 finished with value: 0.592 and parameters: {'learning_rate': 0.2086899224996805, 'depth': 7, 'l2_leaf_reg': 3.4021337686799744}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_26 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/9f212c384bfd4f8cbe99788c46d2ee83
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:26,763] Trial 27 finished with value: 0.573 and parameters: {'learning_rate': 0.1329085530372941, 'depth': 7, 'l2_leaf_reg': 3.5605971659223257}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_27 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/2d087b14c39d4951b48ef10386e22a92
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:28,086] Trial 28 finished with value: 0.564 and parameters: {'learning_rate': 0.09194263980021311, 'depth': 9, 'l2_leaf_reg': 5.137041535688812}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_28 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/a8ea496982a24636ae98716fc906387f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:29,473] Trial 29 finished with value: 0.584 and parameters: {'learning_rate': 0.06267845792872966, 'depth': 10, 'l2_leaf_reg': 8.98997023752528}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_29 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/3a19c2854add4b95b912075b1b53f40b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:30,647] Trial 30 finished with value: 0.573 and parameters: {'learning_rate': 0.03761753188744309, 'depth': 6, 'l2_leaf_reg': 6.998820753769471}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_30 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/ee76d6e04b4c449c8d58420ff4b9de67
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:31,862] Trial 31 finished with value: 0.571 and parameters: {'learning_rate': 0.04961718109516873, 'depth': 8, 'l2_leaf_reg': 1.8410484636196525}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_31 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/abb55dbecce74213b39c2300425a7e5d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:33,096] Trial 32 finished with value: 0.566 and parameters: {'learning_rate': 0.06685138097762715, 'depth': 8, 'l2_leaf_reg': 2.091670467569017}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_32 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/d33283855cfb46ed94f3c12fa6b35a28
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:34,414] Trial 33 finished with value: 0.587 and parameters: {'learning_rate': 0.02770040573789249, 'depth': 9, 'l2_leaf_reg': 2.8273945769723783}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_33 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/25a9c4404c4c40959f8a4e084bfb0b7e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:35,845] Trial 34 finished with value: 0.578 and parameters: {'learning_rate': 0.05555697575774188, 'depth': 10, 'l2_leaf_reg': 2.0657801657717707}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_34 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/ce826cfb306b4e30a050aa55d73c5c3e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:37,052] Trial 35 finished with value: 0.573 and parameters: {'learning_rate': 0.09239698231223484, 'depth': 7, 'l2_leaf_reg': 3.219866506154216}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_35 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/73277249f17b4d3d9f7f510e9bde63b7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:38,213] Trial 36 finished with value: 0.573 and parameters: {'learning_rate': 0.03962913996825942, 'depth': 5, 'l2_leaf_reg': 2.2228039618690874}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_36 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/8fccae4b198540099e5113d0f93c31c2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:39,357] Trial 37 finished with value: 0.566 and parameters: {'learning_rate': 0.14233653116985945, 'depth': 4, 'l2_leaf_reg': 5.994678501559357}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_37 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/aeb6548425ee4764becc487f0b18e692
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:40,595] Trial 38 finished with value: 0.579 and parameters: {'learning_rate': 0.11006384936785527, 'depth': 8, 'l2_leaf_reg': 1.0455235471932482}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_38 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/7a8c1d1aeed741349749c38eee57491f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:41,902] Trial 39 finished with value: 0.56 and parameters: {'learning_rate': 0.23979317837460828, 'depth': 9, 'l2_leaf_reg': 4.134106001587102}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_39 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/bd7b1a455f46460790e167478077d45c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:43,337] Trial 40 finished with value: 0.571 and parameters: {'learning_rate': 0.21397443697466811, 'depth': 10, 'l2_leaf_reg': 4.843465157754345}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_40 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/f347dba1755343ef983115ac209eb71a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:44,625] Trial 41 finished with value: 0.59 and parameters: {'learning_rate': 0.2846583896897537, 'depth': 9, 'l2_leaf_reg': 3.868683363965411}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_41 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/46d1294c65414bd4ba799e63ce10bebf
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:45,925] Trial 42 finished with value: 0.563 and parameters: {'learning_rate': 0.2406039739256781, 'depth': 9, 'l2_leaf_reg': 2.9035868965447067}. Best is trial 15 with value: 0.556.


🏃 View run Catboost_optuna_iteration_42 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/f916aae16c4749c49ea8035dc0490d0a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:47,170] Trial 43 finished with value: 0.551 and parameters: {'learning_rate': 0.16461139272031802, 'depth': 8, 'l2_leaf_reg': 4.063417976603828}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_43 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/3f71187083cd412ba69b04108f2bb432
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:48,446] Trial 44 finished with value: 0.582 and parameters: {'learning_rate': 0.16967815338691372, 'depth': 9, 'l2_leaf_reg': 4.0276643415442805}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_44 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/f5d2867fc7a44246acf91762a7d63398
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:49,628] Trial 45 finished with value: 0.57 and parameters: {'learning_rate': 0.1996511592157663, 'depth': 6, 'l2_leaf_reg': 6.065477502762828}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_45 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/ec4396cf53fd4a2fbdc7da623b2306bb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:50,865] Trial 46 finished with value: 0.574 and parameters: {'learning_rate': 0.14984901465905573, 'depth': 8, 'l2_leaf_reg': 4.54110049051632}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_46 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/9ea8d0b3a3034960a09d5b73f7d27745
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:52,085] Trial 47 finished with value: 0.567 and parameters: {'learning_rate': 0.12466480786239399, 'depth': 7, 'l2_leaf_reg': 3.8161689440056277}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_47 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/1eac634e1f304cbc92bd8721e657a0fb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:53,386] Trial 48 finished with value: 0.566 and parameters: {'learning_rate': 0.24549414786515986, 'depth': 9, 'l2_leaf_reg': 5.495720049674533}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_48 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/306e6ff9b2044fbe9a10332ff6ae475f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


[I 2025-08-26 11:11:54,615] Trial 49 finished with value: 0.568 and parameters: {'learning_rate': 0.07842255349941261, 'depth': 8, 'l2_leaf_reg': 3.215662322272498}. Best is trial 43 with value: 0.551.


🏃 View run Catboost_optuna_iteration_49 at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/70ce04f12b944130b34cc1a4ef227a35
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244
Best validation RMSE: 0.5510
Test RMSE: 0.5768
🏃 View run Catboost_optuna at: http://127.0.0.1:8080/#/experiments/830731795768476244/runs/c16549f5ad634cb8bd12b3e6a0fa70b1
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/830731795768476244


In [18]:
model_info.model_uri

'models:/m-5b72087dbbcc47debae7355e0ac8ccec'

In [19]:
loaded = mlflow.pyfunc.load_model(
    f"{model_info.model_uri}"
)

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

# Loading the best optuna model

In [25]:
# Get experiment by name
experiment_name = "Wine Quality Regression"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment '{experiment_name}' has ID: {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found")

Experiment 'Wine Quality Regression' has ID: 830731795768476244


In [27]:
# Find high-performing models across experiments
top_models = mlflow.search_logged_models(
    experiment_ids=[str(experiment_id)],
    filter_string="tags.best_model = 'true'",
    # max_results=3
)
top_models

In [24]:
top_models

In [28]:
# Search for runs with the "best_model" tag set to "true"
runs = mlflow.search_runs(
    filter_string="tags.best_model = 'true'",
    order_by=["metrics.rmse_val ASC"],
    max_results=3
)


In [29]:
runs.iloc[0]

run_id                                      c16549f5ad634cb8bd12b3e6a0fa70b1
experiment_id                                             830731795768476244
status                                                              FINISHED
artifact_uri               mlflow-artifacts:/830731795768476244/c16549f5a...
start_time                                  2025-08-26 07:10:50.952000+00:00
end_time                                    2025-08-26 07:11:55.826000+00:00
metrics.best_rmse                                                      0.551
metrics.rmse_test_best                                              0.576769
params.learning_rate                                     0.16461139272031802
params.depth                                                               8
params.l2_leaf_reg                                         4.063417976603828
tags.mlflow.source.name    /Users/timurbikmuhametov/miniconda3/envs/mlaca...
tags.mlflow.source.type                                                LOCAL

In [30]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.best_rmse,metrics.rmse_test_best,params.learning_rate,params.depth,params.l2_leaf_reg,tags.mlflow.source.name,tags.mlflow.source.type,tags.best_model,tags.mlflow.user,tags.mlflow.runName
0,c16549f5ad634cb8bd12b3e6a0fa70b1,830731795768476244,FINISHED,mlflow-artifacts:/830731795768476244/c16549f5a...,2025-08-26 07:10:50.952000+00:00,2025-08-26 07:11:55.826000+00:00,0.551,0.576769,0.164611392720318,8,4.063417976603828,/Users/timurbikmuhametov/miniconda3/envs/mlaca...,LOCAL,True,timurbikmuhametov,Catboost_optuna


In [None]:
# Get the run ID of the latest "best_model" if there are several
best_model_exp_id = runs.iloc[0].experiment_id
best_model_run_id = runs.iloc[0].run_id

# Load the best model for inference
loaded_model = mlflow.pyfunc.load_model(f"runs:/{best_model_run_id}/best_catboost_model")
y_pred = loaded_model.predict(x_test)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE on test set: {rmse_test}")

In [90]:

client = MlflowClient()
best_run_id = best_model_run_id  # from your search

print("Artifacts under the run:")
for f in client.list_artifacts(best_run_id):
    print("-", f.path)           # look for "model" or "best_catboost_model"

# Also print the run's tags; if it's registered, you'll see registry-related tags
print("\nRun tags:")
print(client.get_run(best_run_id).data.tags)

Artifacts under the run:

Run tags:
{'best_model': 'true', 'mlflow.user': 'timurbikmuhametov', 'mlflow.runName': 'Catboost_optuna', 'mlflow.source.name': '/Users/timurbikmuhametov/miniconda3/envs/mlacademy-tutorials/lib/python3.11/site-packages/ipykernel_launcher.py', 'mlflow.source.type': 'LOCAL'}


In [None]:
# Get the run ID of the latest "best_model" if there are several
latest_exp_id = runs.iloc[0].experiment_id
latest_run_id = runs.iloc[0].run_id

In [None]:
# List all artifacts for the given run_id
artifacts = client.list_artifacts(latest_run_id)
for artifact in artifacts:
    print(artifact.path)

In [None]:
# Load the latest best model (adjust artifact path if needed)
model_uri = f"runs:/{latest_run_id}/best_model_lasso"  # or ridge_model, etc.
latest_best_model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [None]:
# Setting hyperparameters

lasso_params = {
    'alpha': 0.1,
    'max_iter': 1000,
    'random_state': 42
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, lasso_params, model='Lasso')

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    # Let's add some tags to the run
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="lasso_baseline",
        params=lasso_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='Lasso',
    )
    
    print(f"Model logged with URI: {model_info.model_uri}")
    print(f"RMSE validation: {rmse_val:.4f}")
    print(f"RMSE test: {rmse_test:.4f}")



RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353




Model logged with URI: models:/m-8696a37307ef40cd9f5e6e2376336660
RMSE validation: 0.6356
RMSE test: 0.6386
🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/707987884788603680/runs/d9730f37d2ef462daa8dab8205b6121c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/707987884788603680


In [None]:
latest_best_model

In [None]:
# Register the best model
mlflow.register_model(model_uri, "prod_lasso_model")

Successfully registered model 'prod_lasso_model'.
2025/08/02 14:08:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: prod_lasso_model, version 1
Created version '1' of model 'prod_lasso_model'.


<ModelVersion: aliases=[], creation_timestamp=1754129317360, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1754129317360, metrics=None, model_id=None, name='prod_lasso_model', params=None, run_id='34405e1852b04624bcb1fe1a93bd5146', run_link='', source='models:/m-279c6abed3a24012b3169306a0d3726f', status='READY', status_message=None, tags={}, user_id='', version='1'>

# Loading the registered model

In [None]:
model_name = "prod_lasso_model"
model_version = "latest"

# Load the model from the Model Registry
model_uri = f"models:/{model_name}/{model_version}"
model = mlflow.sklearn.load_model(model_uri)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]