# Model Training with K-Folds Cross-Validation

In [1]:
%load_ext autoreload
%autoreload 2

import joblib
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import pandas as pd

from pathlib import Path
from sklearn import ensemble
from sklearn.metrics import mean_squared_error, mean_absolute_error
from typing import Dict, Tuple

# Prepare Data

## Load Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [2]:
# Download original dataset with: python src/load_data.py 

raw_data = pd.read_csv(f"../data/raw_data.csv")
raw_data.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


## Define column mapping

In [3]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'
numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', ]

## Define the comparison windows

In [4]:
start_date_0 = '2011-01-02 00:00:00'
end_date_0 = '2011-01-30 23:00:00'

experiment_batches = [
    
    ('2011-01-31 00:00:00','2011-02-06 23:00:00'),
    ('2011-02-07 23:00:00','2011-02-13 23:00:00'),
    ('2011-02-14 23:00:00','2011-02-20 23:00:00'),
    ('2011-02-21 00:00:00','2011-02-27 23:00:00'),
    ('2011-02-28 00:00:00','2011-03-06 23:00:00'),  
]

## Define the Reference data

In [5]:
# Set datetime index 
raw_data = raw_data.set_index('dteday')

# Define the reference dataset
reference = raw_data.loc[start_date_0:end_date_0]

print(reference.shape)
reference.head()

(617, 16)


Unnamed: 0_level_0,instant,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
dteday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2011-01-03,48,1,0,1,0,0,1,1,1,0.22,0.197,0.44,0.3582,0,5,5
2011-01-03,49,1,0,1,1,0,1,1,1,0.2,0.1667,0.44,0.4179,0,2,2
2011-01-03,50,1,0,1,4,0,1,1,1,0.16,0.1364,0.47,0.3881,0,1,1
2011-01-03,51,1,0,1,5,0,1,1,1,0.16,0.1364,0.47,0.2836,0,3,3
2011-01-03,52,1,0,1,6,0,1,1,1,0.14,0.1061,0.5,0.3881,0,30,30


# MLflow Client

## Set up MLFlow

In [6]:
# Set up MLFlow Client
MLFLOW_TRACKING_URI = "http://localhost:5001"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

client = MlflowClient()
print(f"Client tracking uri: {client.tracking_uri}")

Client tracking uri: http://localhost:5001


In [7]:
client

<mlflow.tracking.client.MlflowClient at 0x13ce79cd0>

## Create a new Experiment

In order to group any distinct runs of a particular project or idea together, we can define an Experiment that will group each iteration (runs) together. 
Defining a unique name that is relevant to what we're working on helps with organization and reduces the amount of work (searching) to find our runs later on. 

In [8]:
# Get an experiment by name if it exists

exp = client.get_experiment_by_name('1-Train-K-Fold')
exp

In [9]:
# Create a new experiment if it doesn't exist

if exp:
    experiment_id = exp.experiment_id
else:
    experiment_id = client.create_experiment('1-Train-K-Fold')
    
experiment_id

'903011176581290144'

In [10]:
# Fetch experiment metadata information

experiment = client.get_experiment(experiment_id)
print(f"Name: {experiment.name}")
print(f"Experiment_id: {experiment.experiment_id}")
print(f"Artifact Location: {experiment.artifact_location}")
print(f"Tags: {experiment.tags}")
print(f"Lifecycle_stage: {experiment.lifecycle_stage}")

Name: 1-Train-K-Fold
Experiment_id: 903011176581290144
Artifact Location: mlflow-artifacts:/903011176581290144
Tags: {}
Lifecycle_stage: active


# Metrics Tracking for K-Fold Experiments

In [11]:
# Set experiment
mlflow.set_experiment('1-Train-K-Fold') # Create a new Experiment if it doesn't exist

# Set experiment variables
model_path = Path('../models/model.joblib')
ref_end_data = end_date_0

# Run model train for each batch (K-Fold)
for k, date in enumerate(experiment_batches):

    print(f"Train period: {start_date_0} - {ref_end_data}") 
    X_train = raw_data.loc[start_date_0:ref_end_data, numerical_features + categorical_features]
    y_train = raw_data.loc[start_date_0:ref_end_data, target]
    print("X_train (reference) dataset shape: ", X_train.shape, y_train.shape)
    
    print(f"Test period: {date[0]} - {date[1]}") 
    current = raw_data.loc[date[0]:date[1]]
    X_test = current.loc[:, numerical_features + categorical_features]
    y_test = current[target]
    print("X_test (current)) dataset shape: ",  X_test.shape, y_test.shape)

    # Train model
    regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
    regressor.fit(X_train, y_train)

    # Calculate metrics
    preds = regressor.predict(X_test)
    me = mean_squared_error(y_test, preds)
    mae = mean_absolute_error(y_test, preds)
    # print(me, mae)
    
    # Start a new MLflow Run
    with mlflow.start_run() as run: 
        
        # Show newly created run metadata info
        print("Experiment id: {}".format(run.info.experiment_id))
        print("Run id: {}".format(run.info.run_id))
        print("Run name: {}".format(run.info.run_name))
        print('MLFlow tracking uri:', mlflow.get_tracking_uri())
        print('MLFlow artifact uri:', mlflow.get_artifact_uri())
        
        # Log parameters
        mlflow.log_param("begin", date[0])
        mlflow.log_param("end", date[1])
        
        # Log metrics
        mlflow.log_metric('me', round(me, 3))
        mlflow.log_metric('mae', round(mae, 3))
        
        # Log model 
        mlflow.log_artifact(model_path)

    # Update reference end date
    ref_end_data = date[1]
    

2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Train period: 2011-01-02 00:00:00 - 2011-01-30 23:00:00
X_train (reference) dataset shape:  (617, 10) (617,)
Test period: 2011-01-31 00:00:00 - 2011-02-06 23:00:00
X_test (current)) dataset shape:  (141, 10) (141,)
Experiment id: 903011176581290144
Run id: a2da26d5c5fb4773b3b73352987d826a
Run name: rambunctious-hen-993
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/903011176581290144/a2da26d5c5fb4773b3b73352987d826a/artifacts


2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run rambunctious-hen-993 at: http://localhost:5001/#/experiments/903011176581290144/runs/a2da26d5c5fb4773b3b73352987d826a
🧪 View experiment at: http://localhost:5001/#/experiments/903011176581290144
Train period: 2011-01-02 00:00:00 - 2011-02-06 23:00:00
X_train (reference) dataset shape:  (782, 10) (782,)
Test period: 2011-02-07 23:00:00 - 2011-02-13 23:00:00
X_test (current)) dataset shape:  (139, 10) (139,)
Experiment id: 903011176581290144
Run id: 1ddd63962e984874994a7ebf64a4a19f
Run name: enchanting-finch-566
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/903011176581290144/1ddd63962e984874994a7ebf64a4a19f/artifacts
🏃 View run enchanting-finch-566 at: http://localhost:5001/#/experiments/903011176581290144/runs/1ddd63962e984874994a7ebf64a4a19f
🧪 View experiment at: http://localhost:5001/#/experiments/903011176581290144
Train period: 2011-01-02 00:00:00 - 2011-02-13 23:00:00
X_train (reference) dataset shape:  (945, 10) (945,)
Test period: 2

2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


Experiment id: 903011176581290144
Run id: 353b9c16172842b7a37426eae706ae33
Run name: powerful-jay-140
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/903011176581290144/353b9c16172842b7a37426eae706ae33/artifacts
🏃 View run powerful-jay-140 at: http://localhost:5001/#/experiments/903011176581290144/runs/353b9c16172842b7a37426eae706ae33
🧪 View experiment at: http://localhost:5001/#/experiments/903011176581290144
Train period: 2011-01-02 00:00:00 - 2011-02-20 23:00:00
X_train (reference) dataset shape:  (1110, 10) (1110,)
Test period: 2011-02-21 00:00:00 - 2011-02-27 23:00:00
X_test (current)) dataset shape:  (134, 10) (134,)
Experiment id: 903011176581290144
Run id: 013d7ab31a894ceebbacc2138762c061
Run name: skillful-wasp-852
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/903011176581290144/013d7ab31a894ceebbacc2138762c061/artifacts
🏃 View run skillful-wasp-852 at: http://localhost:5001/#/experiments/903011176581290144/

2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:50 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


Experiment id: 903011176581290144
Run id: d5505bf4f27e47d398fbb41597e675b6
Run name: nimble-cub-510
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/903011176581290144/d5505bf4f27e47d398fbb41597e675b6/artifacts
🏃 View run nimble-cub-510 at: http://localhost:5001/#/experiments/903011176581290144/runs/d5505bf4f27e47d398fbb41597e675b6
🧪 View experiment at: http://localhost:5001/#/experiments/903011176581290144


# Nested Runs

In [12]:
# Set up MLFlow Client
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()
print(f"Client tracking uri: {client.tracking_uri}")

Client tracking uri: http://localhost:5001


In [13]:

# Set experiment name
mlflow.set_experiment('2-Nested-Runs')

2025/06/17 09:35:51 INFO mlflow.tracking.fluent: Experiment with name '2-Nested-Runs' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/364566909229737918', creation_time=1750127751599, experiment_id='364566909229737918', last_update_time=1750127751599, lifecycle_stage='active', name='2-Nested-Runs', tags={}>

In [14]:

# Set experiment variables
model_path = Path('../models/model.joblib')
ref_end_data = end_date_0

# Start a new Run (Parent Run)
with mlflow.start_run() as run: 
    
    # Update metrics with metrics for each Fold
    metrics = {}

    # Run model train for each batch (K-Fold)
    for k, date in enumerate(experiment_batches):
            
        print(f"Train period: {start_date_0} - {ref_end_data}") 
        X_train = raw_data.loc[start_date_0:ref_end_data, numerical_features + categorical_features]
        y_train = raw_data.loc[start_date_0:ref_end_data, target]
        print("X_train (reference) dataset shape: ", X_train.shape, y_train.shape)
        
        print(f"Test period: {date[0]} - {date[1]}") 
        current = raw_data.loc[date[0]:date[1]]
        X_test = current.loc[:, numerical_features + categorical_features]
        y_test = current[target]
        print("X_test (current)) dataset shape: ",  X_test.shape, y_test.shape)
        
        # Update reference end date
        ref_end_data = date[1]

        # Train model
        regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
        regressor.fit(X_train, y_train)

        # Calculate metrucs
        preds = regressor.predict(X_test)
        me = mean_squared_error(y_test, preds)
        mae = mean_absolute_error(y_test, preds)
        metrics.update({date[1]: {'me': me, 'mae': mae}})
        
        # Run a Child Run for each Fold 
        with mlflow.start_run(run_name=date[1], nested=True) as child_run:
            
            # Show newly created run metadata info
            print("Experiment id: {}".format(run.info.experiment_id))
            print("Run id: {}".format(run.info.run_id))
            print("Run name: {}".format(run.info.run_name))
            print('MLFlow tracking uri:', mlflow.get_tracking_uri())
            print('MLFlow artifact uri:', mlflow.get_artifact_uri())
            
            # Log parameters
            mlflow.log_param("begin", date[0])
            mlflow.log_param("end", date[1])
            
            # Log metrics
            mlflow.log_metric('me', round(me, 3))
            mlflow.log_metric('mae', round(mae, 3))
        
    # Save model
    joblib.dump(regressor, model_path)

    # Log the last batch model as the parent Run model
    mlflow.log_artifact(model_path)
    
    # Log metrics
    average_run_merics = pd.DataFrame.from_dict(metrics).T.mean().round(3).to_dict()
    mlflow.log_metrics(average_run_merics)

2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Train period: 2011-01-02 00:00:00 - 2011-01-30 23:00:00
X_train (reference) dataset shape:  (617, 10) (617,)
Test period: 2011-01-31 00:00:00 - 2011-02-06 23:00:00
X_test (current)) dataset shape:  (141, 10) (141,)
Experiment id: 364566909229737918
Run id: 190a2c7c829548b38db86bffe60a6fcd
Run name: debonair-penguin-841
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/364566909229737918/231a10fcb1404faaaf84222ae2a28ce0/artifacts
🏃 View run 2011-02-06 23:00:00 at: http://localhost:5001/#/experiments/364566909229737918/runs/231a10fcb1404faaaf84222ae2a28ce0
🧪 View experiment at: http://localhost:5001/#/experiments/364566909229737918
Train period: 2011-01-02 00:00:00 - 2011-02-06 23:00:00
X_train (reference) dataset shape:  (782, 10) (782,)
Test period: 2011-02-07 23:00:00 - 2011-02-13 23:00:00
X_test (current)) dataset shape:  (139, 10) (139,)
Experiment id: 364566909229737918
Run id: 190a2c7c829548b38db86bffe60a6fcd
Run name: debonair-penguin-841
MLFlow tr

2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run 2011-02-13 23:00:00 at: http://localhost:5001/#/experiments/364566909229737918/runs/30060a461af14ff0985a1ad6e027b1e1
🧪 View experiment at: http://localhost:5001/#/experiments/364566909229737918
Train period: 2011-01-02 00:00:00 - 2011-02-13 23:00:00
X_train (reference) dataset shape:  (945, 10) (945,)
Test period: 2011-02-14 23:00:00 - 2011-02-20 23:00:00
X_test (current)) dataset shape:  (141, 10) (141,)
Experiment id: 364566909229737918
Run id: 190a2c7c829548b38db86bffe60a6fcd
Run name: debonair-penguin-841
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/364566909229737918/28949a371e264420887e2a047b4cc78d/artifacts
🏃 View run 2011-02-20 23:00:00 at: http://localhost:5001/#/experiments/364566909229737918/runs/28949a371e264420887e2a047b4cc78d
🧪 View experiment at: http://localhost:5001/#/experiments/364566909229737918
Train period: 2011-01-02 00:00:00 - 2011-02-20 23:00:00
X_train (reference) dataset shape:  (1110, 10) (1110,)
Test period: 2

2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


Experiment id: 364566909229737918
Run id: 190a2c7c829548b38db86bffe60a6fcd
Run name: debonair-penguin-841
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/364566909229737918/70f87d7070664a55a3bbb65c41c012e7/artifacts
🏃 View run 2011-02-27 23:00:00 at: http://localhost:5001/#/experiments/364566909229737918/runs/70f87d7070664a55a3bbb65c41c012e7
🧪 View experiment at: http://localhost:5001/#/experiments/364566909229737918
Train period: 2011-01-02 00:00:00 - 2011-02-27 23:00:00
X_train (reference) dataset shape:  (1268, 10) (1268,)
Test period: 2011-02-28 00:00:00 - 2011-03-06 23:00:00
X_test (current)) dataset shape:  (143, 10) (143,)
Experiment id: 364566909229737918
Run id: 190a2c7c829548b38db86bffe60a6fcd
Run name: debonair-penguin-841
MLFlow tracking uri: http://localhost:5001
MLFlow artifact uri: mlflow-artifacts:/364566909229737918/59320baf97bb43dd842e20486bb3b8ae/artifacts
🏃 View run 2011-03-06 23:00:00 at: http://localhost:5001/#/experiments/3645669

2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run debonair-penguin-841 at: http://localhost:5001/#/experiments/364566909229737918/runs/190a2c7c829548b38db86bffe60a6fcd
🧪 View experiment at: http://localhost:5001/#/experiments/364566909229737918


# Log metrics by steps or timestamps

In [15]:
import time
import datetime

# Set up MLFlow Client
MLFLOW_TRACKING_URI = "http://localhost:5001"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()
print(f"Client tracking uri: {client.tracking_uri}")

# Set experiment name
mlflow.set_experiment('3-Metrics-by-steps')

# Set experiment variables
model_path = Path('../models/model.joblib')
ref_end_data = end_date_0

# Start a new MLflow Run
with mlflow.start_run() as run: 

    # Run model train for each batch (K-Fold)
    for k, date in enumerate(experiment_batches):

        # Calculate timestamp
        timestamp = time.mktime(datetime.datetime.strptime(date[1], "%Y-%m-%d %H:%M:%S").timetuple())
    
        print(f"Train period: {start_date_0} - {ref_end_data}") 
        X_train = raw_data.loc[start_date_0:ref_end_data, numerical_features + categorical_features]
        y_train = raw_data.loc[start_date_0:ref_end_data, target]
        print("X_train (reference) dataset shape: ", X_train.shape, y_train.shape)
        
        print(f"Test period: {date[0]} - {date[1]}") 
        current = raw_data.loc[date[0]:date[1]]
        X_test = current.loc[:, numerical_features + categorical_features]
        y_test = current[target]
        print("X_test (current)) dataset shape: ",  X_test.shape, y_test.shape)
        
        # Update reference end date
        ref_end_data = date[1]
        
        # Train model
        regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)
        regressor.fit(X_train, y_train)
    
        # Calculate metrics
        preds = regressor.predict(X_test)
        me = mean_squared_error(y_test, preds)
        mae = mean_absolute_error(y_test, preds)
        # print(me, mae)
        
        # Log metrics (use Client)
        # >>> 'timestamp' - Time when this metric was calculated. Defaults to the current system time
        # >>> 'step' -  Integer training step (iteration) at which was the metric calculated. Defaults to 0.
        client.log_metric(run.info.run_id, 'me', round(me, 3), timestamp=int(timestamp)*1000)
        client.log_metric(run.info.run_id, 'mae', round(mae, 3), step=k)

    # Log model 
    mlflow.log_artifact(model_path)

    # Log parameters
    mlflow.log_param("begin", date[0])
    mlflow.log_param("end", date[1])
    

2025/06/17 09:35:52 INFO mlflow.tracking.fluent: Experiment with name '3-Metrics-by-steps' does not exist. Creating a new experiment.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:52 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Client tracking uri: http://localhost:5001
Train period: 2011-01-02 00:00:00 - 2011-01-30 23:00:00
X_train (reference) dataset shape:  (617, 10) (617,)
Test period: 2011-01-31 00:00:00 - 2011-02-06 23:00:00
X_test (current)) dataset shape:  (141, 10) (141,)
Train period: 2011-01-02 00:00:00 - 2011-02-06 23:00:00
X_train (reference) dataset shape:  (782, 10) (782,)
Test period: 2011-02-07 23:00:00 - 2011-02-13 23:00:00
X_test (current)) dataset shape:  (139, 10) (139,)
Train period: 2011-01-02 00:00:00 - 2011-02-13 23:00:00
X_train (reference) dataset shape:  (945, 10) (945,)
Test period: 2011-02-14 23:00:00 - 2011-02-20 23:00:00
X_test (current)) dataset shape:  (141, 10) (141,)
Train period: 2011-01-02 00:00:00 - 2011-02-20 23:00:00
X_train (reference) dataset shape:  (1110, 10) (1110,)
Test period: 2011-02-21 00:00:00 - 2011-02-27 23:00:00
X_test (current)) dataset shape:  (134, 10) (134,)
Train period: 2011-01-02 00:00:00 - 2011-02-27 23:00:00
X_train (reference) dataset shape:  (12

2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run hilarious-shrew-687 at: http://localhost:5001/#/experiments/958827083967108750/runs/e59d64f8d86d418c859ae30b3305e6df
🧪 View experiment at: http://localhost:5001/#/experiments/958827083967108750


## Use Run ID to add a nested run


In [16]:
# Train script

# Start a new Run (Parent Run)
with mlflow.start_run() as run: 
    print(run)
    

2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/06/17 09:35:53 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


<ActiveRun: >
🏃 View run invincible-kite-975 at: http://localhost:5001/#/experiments/958827083967108750/runs/16b056dc7bad45f4b526bea7128a5e62
🧪 View experiment at: http://localhost:5001/#/experiments/958827083967108750
