In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mlflow
from mlflow.models.signature import infer_signature
from typing import Tuple, Union, List, Dict, Mapping, Any, Literal, Optional
from catboost import CatBoostRegressor
import matplotlib.pyplot as plt
import joblib
import torch
import optuna
from pathlib import Path
from utils import plot_errors, eval_model
import utils
import warnings
%load_ext autoreload
%autoreload 2
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

# Table of Contents
1. [Setting up MLflow](#Setting-up-MLflow)
2. [First experiment: RF baseline](#First-experiment:-RF-baseline)
3. [Cleaned data: Random Forest](#Cleaned-data:-Random-Forest)
4. [Cleaned data: CatBoost](#Cleaned-data:-CatBoost)
5. [Cleaned Data: MLP Neural Network](#Cleaned-Data:-MLP-Neural-Network)
6. [Cleaned Data: LSTM](#Cleaned-Data:-LSTM)
7. [Child runs intro](#Child-runs-intro)
8. [CatBoost Bayesian Hyperparameter Tuning with Child Runs](#CatBoost-Bayesian-Hyperparameter-Tuning-with-Child-Runs)
9. [Logging important artifacts](#Logging-important-artifacts)
10. [Logging and loading the best CatBoost Optuna model with X_scaler](#Logging-and-loading-the-best-CatBoost-Optuna-model-with-X_scaler)
11. [Testing the best model on Production Data](#Testing-the-best-model-on-Production-Data)

In [10]:
SEED = 42

In [11]:
def remove_diff_outliers(df, column, diff_threshold):
    """
    Remove outliers based on absolute first-order diff and forward-fill the gaps.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe.
    column : str
        Column to clean.
    diff_threshold : float
        Absolute diff threshold.

    Returns
    -------
    df_clean : pd.DataFrame
        Cleaned dataframe with forward fill.
    outlier_idx : pd.Index
        Indices of removed outliers.
    """

    df_clean = df.copy()

    # 1. Compute absolute diff
    diff_vals = df_clean[column].diff(1).abs()

    # 2. Outlier mask
    outlier_mask = diff_vals > diff_threshold
    outlier_idx = df_clean.index[outlier_mask]

    # 3. Remove outliers
    df_clean.loc[outlier_idx, column] = np.nan

    # 4. Forward fill (and backfill if needed)
    df_clean[column] = df_clean[column].ffill().bfill()

    return df_clean, outlier_idx

In [12]:
def smooth_signal(df, column, window, method="median"):
    """
    Smooth a time-series column using rolling mean or median.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe.
    column : str
        Column to smooth.
    window : int
        Rolling window size.
    method : str
        "mean"  -> rolling mean filter
        "median" -> rolling median filter (robust smoothing)

    Returns
    -------
    df_smoothed : pd.DataFrame
        DataFrame with smoothed column.
    """

    df_smoothed = df.copy()

    if method == "mean":
        df_smoothed[column] = df_smoothed[column].rolling(
            window=window, min_periods=1, center=False
        ).mean()

    elif method == "median":
        df_smoothed[column] = df_smoothed[column].rolling(
            window=window, min_periods=1, center=False
        ).median()

    else:
        raise ValueError("method must be 'mean' or 'median'")

    return df_smoothed

In [13]:
def add_lag_features(df: pd.DataFrame, columns:List[str]=None, lags: List[int]=[1], drop_na=True):
    """
    Add lag features to DataFrame.
    
    Parameters:
    - df: DataFrame
    - columns: list of column names (default: all numeric)
    - lags: int or list of lag periods (default: 1)
    - drop_na: bool, drop NaN rows (default: True)
    
    Returns: DataFrame with lag features
    """
    df_result = df.copy()
    
    # Create lag features
    for col in columns:
        for lag in lags:
            df_result[f"{col}_lag{lag}"] = df_result[col].shift(lag)
    
    if drop_na:
        return df_result.dropna()
    else:
        return df_result.bfill()  # Backward fill NaNs

In [14]:
def get_clean_data(
    df_train: pd.DataFrame,
    df_test: pd.DataFrame,
    abs_diff_thresholds: Dict[str, float],
    smooth_window: int
) -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
    """
    Clean and enrich train/test turbine datasets:
    - remove outliers using absolute-difference thresholds,
    - denoise selected signals via FFT low-pass filter,
    - add lag features,
    - add rolling statistical features.

    Assumptions
    ----------
    - `df_train` and `df_test` contain a target column named 'Power'.
    - `remove_diff_outliers` does NOT drop rows (it forward-fills / smooths values).
    - Columns required for denoising and feature engineering are present:
      ['GenRPM', 'GenPh1Temp', 'WindSpeed', 'WindDirAbs',
       'WindDirRel', 'Pitch', 'RotorRPM'].
    - Helper functions `remove_diff_outliers`, `fft_lowpass_filter`,
      `add_lag_features`, and `add_rolling_features` are defined elsewhere.

    Parameters
    ----------
    df_train : pd.DataFrame
        Training dataset with features and target ('Power').
    df_test : pd.DataFrame
        Test dataset with features and target ('Power').
    abs_diff_thresholds : Dict[str, float]
        Mapping from column name to absolute-difference threshold used by
        `remove_diff_outliers` to smooth outliers.

    Returns
    -------
    X_train : pd.DataFrame
        Cleaned and feature-engineered training features (without 'Power').
    y_train : pd.Series
        Cleaned training target ('Power').
    X_test : pd.DataFrame
        Cleaned and feature-engineered test features (without 'Power').
    y_test : pd.Series
        Original test target ('Power'), not filtered by outlier logic.
    """
    df_filtered_train = df_train.copy()

    # Applying filter by difference
    # Remove outliers on the train set (features + target)
    for col, thr in abs_diff_thresholds.items():
        df_filtered_train, removed_idx = remove_diff_outliers(df_filtered_train, col, thr)

    # Remove outliers on the test set (features only)
    filt_cols = [col for col in df_test.columns if col != "Power"]
    x_filtered_test = df_test[filt_cols].copy()

    for col, thr in abs_diff_thresholds.items():
        if col != "Power":
            x_filtered_test, removed_idx = remove_diff_outliers(x_filtered_test, col, thr)

    # Denoising
    smooth_cols = [col for col in df_filtered_train.columns if col != 'Power']

    for col in smooth_cols:
        df_filtered_train = smooth_signal(df_filtered_train, col, window=smooth_window, method="mean")
    
    for col in smooth_cols:
        x_filtered_test = smooth_signal(x_filtered_test, col, window=smooth_window, method="mean")


    # Add Lag Features in train and test datasets
    df_filtered_train = add_lag_features(
        df_filtered_train,
        columns=["GenRPM", "GenPh1Temp", 'WindSpeed'],
        lags=[1, 2, 3],
        drop_na=False,
    )
    x_filtered_test = add_lag_features(
        x_filtered_test,
        columns=["GenRPM", "GenPh1Temp", 'WindSpeed'],
        lags=[1, 2, 3],
        drop_na=False,
    )

    # Add Statistical Features in the train dataset
    df_filtered_train = add_rolling_features(
        df_filtered_train,
        window_sizes=[3, 6],  # e.g. [5, 10, 15]
        columns=["GenRPM", "GenPh1Temp", "WindSpeed",
                 "WindDirAbs", "WindDirRel", "Pitch", "RotorRPM"],
        stats=["max"],
        drop_na=False,
    )

    # Add Statistical Features in the test dataset
    x_filtered_test = add_rolling_features(
        x_filtered_test,
        window_sizes=[3, 6],  # e.g. [5, 10, 15]
        columns=["GenRPM", "GenPh1Temp", "WindSpeed",
                 "WindDirAbs", "WindDirRel", "Pitch", "RotorRPM"],
        stats=["max"],
        drop_na=False,
    )

    X_train = df_filtered_train.drop(columns="Power")
    y_train = df_filtered_train["Power"]
    X_test = x_filtered_test
    y_test = df_test["Power"]

    return X_train, y_train, X_test, y_test

In [15]:
def add_rolling_features(df, columns, window_sizes=7, stats=['mean', 'median'], drop_na=True):
    """
    Add rolling features to DataFrame.
    
    Parameters:
    - df: DataFrame
    - columns: list of column names
    - window_sizes: int or list of window sizes (default: 7)
    - stats: list of statistics ['mean', 'median', 'std', 'min', 'max', 'skew', 'kurt']
    - drop_na: bool, drop NaN rows (default: True)
    
    Returns: DataFrame with rolling features
    """
    df_result = df.copy()
    
    # Convert single values to lists
    if isinstance(window_sizes, int):
        window_sizes = [window_sizes]
    if isinstance(columns, str):
        columns = [columns]
    
    # Create rolling features
    for col in columns:
        for window in window_sizes:
            rolling = df_result[col].rolling(window)
            
            for stat in stats:
                if stat == 'mean':
                    df_result[f"{col}_roll{window}_mean"] = rolling.mean()
                elif stat == 'median':
                    df_result[f"{col}_roll{window}_median"] = rolling.median()
                elif stat == 'std':
                    df_result[f"{col}_roll{window}_std"] = rolling.std()
                elif stat == 'min':
                    df_result[f"{col}_roll{window}_min"] = rolling.min()
                elif stat == 'max':
                    df_result[f"{col}_roll{window}_max"] = rolling.max()
                elif stat == 'skew':
                    df_result[f"{col}_roll{window}_skew"] = rolling.skew()
                elif stat == 'kurt':
                    df_result[f"{col}_roll{window}_kurt"] = rolling.kurt()
    if drop_na:
        return df_result.dropna()
    else:
        return df_result.bfill()  # Backward fill NaNs

**In the terminal, navigate to project root folder and run in the terminal (command line):**
1. mkdir mlflow
2. cd mlflow
3. mlflow server --host 127.0.0.1 --port 8080

**This is what it does:**

1. **mkdir mlflow** creates mlflow directory where we will store the models and experiments.

2. **cd mlflow** changes the directory to mlflow

3. Starts the MLflow Tracking Server
A dedicated process that manages and serves your MLflow experiments.

4. Provides a Web UI
Accessible at http://127.0.0.1:8080 (or localhost:8080), where you can browse experiments, runs, parameters, metrics, and artifacts.

5. Exposes a Tracking API Endpoint
Other scripts or notebooks can log directly to this server if you set - **mlflow.set_tracking_uri("http://127.0.0.1:8080")**

6. MLflow automatically creates a folder mlruns/ in your working directory the first time you log something.
Inside mlruns/, it creates subfolders for:

- each experiment (default is 0)

- each run within that experiment

# Setting up MLflow

### Setting URI

In [None]:
# Set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

What it does:

1. It tells your MLflow client (your script/notebook) where to send all logging data (experiments, runs, params, metrics, artifacts).

3. Since our host is local, it will still write to mlruns, but here you could configure a remote host URI.

In [None]:
print("Current Tracking URI:", mlflow.get_tracking_uri())

# First experiment: RF baseline

To create an experiment, we need to run the command:

mlflow.set_experiment("Baseline Anomaly Model")

If we do so, in the UI (http://127.0.0.1:8080), we will see that the experiment is created.

In [None]:
# Set the experiment name - it also creates an experiment if it doesn't exist
mlflow.set_experiment("Baseline Anomaly Model")

Let's log the very first model that we got on the raw data and log our first experiment.

In [20]:
# Read again for reproducibility
df = pd.read_parquet('../data/01_raw/df_train_test.parquet')
df = df[df['Power'] > 20].copy()
df.index = pd.to_datetime(df['Timestamps'])
# df.drop(columns=['Timestamps'], inplace=True)

# Split the data
df_train = df[:30_000]
df_test = df[30_000:]

In [21]:
df_train

Unnamed: 0_level_0,Timestamps,WindSpeed,WindDirAbs,WindDirRel,Power,Pitch,GenRPM,RotorRPM,EnvirTemp,NacelTemp,GearOilTemp,GearBearTemp,GenPh1Temp,GenBearTemp
Timestamps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2007-07-29 03:10:00,2007-07-29 03:10:00,4.790142,307.5,1.8,520.300000,-1.483813,1078.376427,16.229237,17.866438,59.882623,58.059329,49.079118,79.193312,51.0
2007-07-29 03:20:00,2007-07-29 03:20:00,4.285207,312.5,2.2,564.700000,1.831067,841.497573,16.686210,21.404675,15.233595,46.598905,45.249571,95.670331,52.0
2007-07-29 03:30:00,2007-07-29 03:30:00,7.143066,304.4,-4.9,1632.407112,-4.402055,1022.534164,15.149165,16.315175,30.932959,49.956838,55.738552,102.969851,53.0
2007-07-29 03:40:00,2007-07-29 03:40:00,9.469090,299.5,-3.1,571.600000,-8.720774,867.724340,12.674883,17.626694,17.668583,45.713568,46.011476,91.759158,53.0
2007-07-29 03:50:00,2007-07-29 03:50:00,3.997540,313.8,6.7,553.000000,-2.014152,1048.110983,18.668083,17.984032,31.084279,50.049534,67.344350,90.274146,53.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008-02-22 10:20:00,2008-02-22 10:20:00,2.168136,301.2,-0.3,524.900000,-2.657893,929.297145,13.759142,8.362282,38.902156,49.957331,49.694407,76.873304,76.0
2008-02-22 10:30:00,2008-02-22 10:30:00,8.533812,301.5,0.2,531.300000,1.474806,904.893779,18.096816,13.002330,15.161773,50.281284,64.106411,81.280686,76.0
2008-02-22 10:40:00,2008-02-22 10:40:00,3.595898,299.7,-1.1,558.200000,-4.092066,1002.941924,29.516623,18.171560,29.842036,43.268336,49.106053,85.158141,76.0
2008-02-22 10:50:00,2008-02-22 10:50:00,-1.077070,290.6,-3.1,540.300000,-0.432372,955.865234,16.403157,12.481199,32.796415,56.342692,71.150101,92.161368,76.0


In [None]:
params = {
    'n_estimators': 100, 
    'random_state': SEED,
    'n_jobs':-1
}

feats = [col for col in df_train.columns if col != 'Power']

# Start an MLflow run
with mlflow.start_run(run_name='rf_baseline'):
    # Train the model and compute the metrics
    eval_results  = eval_model(
        df_train[feats],
        df_train['Power'],
        df_test[feats], 
        df_test['Power'], 
        3, 
        'RF', 
        params
    )
    # Log (store) the hyperparameters
    mlflow.log_params(params)
    
    # Log the Cross Validation metrics
    mlflow.log_metric("cv_mae", eval_results['cv_mae'])
    mlflow.log_metric("cv_rmse", eval_results['cv_rmse'])
    mlflow.log_metric("cv_mape", eval_results['cv_mape'])

    # Log the Test metrics
    mlflow.log_metric("test_mae", eval_results['test_mae'])
    mlflow.log_metric("test_rmse", eval_results['test_rmse'])
    mlflow.log_metric("test_mape", eval_results['test_mape'])
    
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("model_version", "baseline")

    model_name = "baseline_rf"
    
    # Log (store) the model
    model_info = mlflow.sklearn.log_model(
        sk_model=eval_results['model'],
        name=model_name,
        input_example=None, # We will add the input example later
    )

Now, all we need to do is log the parameters, metrics, and the model.

We can go to http://127.0.0.1:8080/ and see the run and experiment in the Tracking Server UI.

We can also find the stored data in our file system in the ./mlflow directory.

If we check the model size, it's about 250 MB, so Random Forest models are quite heavy, so be careful when storing many of experimental models locally.

# Cleaned data: Random Forest

Now, let's make a function that logs the most important artifacts.

In [None]:
abs_diff_thresholds = {
    'WindSpeed': 12,
    'WindDirAbs': 70,
    'Power': 200,
    'Pitch': 12,
    'GenRPM': 450,
    'WindDirRel': 9,
    'NacelTemp': 40,
    'GenPh1Temp': 40,
    'RotorRPM': 20,
    'EnvirTemp': 17,
    'GearOilTemp':  20,  
    'GearBearTemp': 35,
    'GenBearTemp': 8,
    'GenPh1Temp':   35, 
}

In [None]:
# Read again for reproducibility
df = pd.read_parquet('../data/01_raw/df_train_test.parquet')
df = df[df['Power'] > 20].copy()
df.index = pd.to_datetime(df['Timestamps'])
df.drop(columns=['Timestamps'], inplace=True)

# Split the data
df_train = df[:30_000]
df_test = df[30_000:]

In [None]:
x_train, y_train, x_test, y_test = get_clean_data(df_train, df_test, abs_diff_thresholds, smooth_window=3)

To log the models in a similar manner and avoid code repetition, let's create a function that logs the run parameters, models and other meta data.

In [None]:
def log_run(
    params: Mapping[str, Any],
    metrics: Mapping[str, float],
    tags: Mapping[str, Any],
    trained_model: Any,
    model_type: Literal["RF", "CatBoost", "MLP", "LSTM"] = "RF",
    input_example: Optional[Any] = None,
    registered_model_name: Optional[str] = None,
    model_name: str = "model",
) -> Any:
    """
    Log a single training run to MLflow: params, metrics, tags and the trained model.

    This function assumes that an MLflow run has already been started
    (e.g. via ``mlflow.start_run()`` or an MLflow context manager).

    Parameters
    ----------
    params : Mapping[str, Any]
        Hyperparameters and configuration of the run to log via ``mlflow.log_params``.
    metrics : Mapping[str, float]
        Metric values (e.g., RMSE, MAE) to log via ``mlflow.log_metrics``.
    tags : Mapping[str, Any]
        Arbitrary metadata tags (e.g., dataset name, model family, experiment info).
    trained_model : Any
        Fitted model instance to be logged. Must be compatible with the chosen
        MLflow flavor (`sklearn`, `catboost`, or `pytorch`).
    model_type : {"RF", "CatBoost", "MLP", "LSTM"}, default="RF"
        High-level model family used to pick the appropriate MLflow logging flavor:
        - "RF"       → ``mlflow.sklearn.log_model``
        - "CatBoost" → ``mlflow.catboost.log_model``
        - "MLP"      → ``mlflow.pytorch.log_model``
        - "LSTM"     → ``mlflow.pytorch.log_model``
    input_example : Any, optional
        Example input passed to MLflow for model signature inference and UI preview.
    registered_model_name : str, optional
        If provided, the model will be registered in the MLflow Model Registry
        under this name.
    model_name : str, default="model"
        Name of the model artifact within the run (e.g., "model", "rf_model").

    Returns
    -------
    Any
        The MLflow ``ModelInfo`` object returned by the underlying
        ``mlflow.<flavor>.log_model`` call. Can be used to inspect the
        logged model's URI and other metadata.

    Raises
    ------
    ValueError
        If ``model_type`` is not one of the supported values.
    """

    # ---- metadata logging ----
    mlflow.log_params(dict(params))
    mlflow.log_metrics(dict(metrics))
    mlflow.set_tags(dict(tags))

    # ---- model logging ----
    if model_type == "RF":
        model_info = mlflow.sklearn.log_model(
            sk_model=trained_model,
            name=model_name,
            input_example=input_example,
            registered_model_name=registered_model_name,
        )

    elif model_type == "CatBoost":
        model_info = mlflow.catboost.log_model(
            cb_model=trained_model,
            name=model_name,
            input_example=input_example,
            registered_model_name=registered_model_name,
        )

    elif model_type in ("MLP", "LSTM"):
        model_info = mlflow.pytorch.log_model(
            pytorch_model=trained_model,
            name=model_name,
            input_example=input_example,
            registered_model_name=registered_model_name,
        )

    else:
        raise ValueError(f"Unsupported model type: {model_type}")

    return model_info

In [None]:
params = {
    'n_estimators': 100, 
    'random_state': SEED,
    'n_jobs':-1
}


eval_results  = eval_model(
        x_train,
        y_train,
        x_test, 
        y_test, 
        3, 
        'RF', 
        params
    )

metrics = {
    "cv_mae": eval_results["cv_mae"],
    "cv_rmse": eval_results["cv_rmse"],
    "cv_mape": eval_results["cv_mape"],
    "test_mae": eval_results["test_mae"],
    "test_rmse": eval_results["test_rmse"],
    "test_mape": eval_results["test_mape"],
}

run_name = 'rf_cleaned_data'

# Start an MLflow run
with mlflow.start_run(run_name=run_name):
    tags = {
        'model_version': 'cleaned_data',
    }
    
    # Log everything using our function
    model_info = log_run(
        params=params,
        metrics=metrics,
        tags=tags,
        trained_model=eval_results['model'],
        input_example=None,
        model_type="RF",
        model_name="rf_cleaned_data"
)

# Cleaned data: CatBoost

Now, let's run CatBoost.

In [None]:
params = {
    "iterations": 200,       # number of trees
    "learning_rate": 0.05,    
    "depth": 6,               
    "l2_leaf_reg": 1.0,      
    "random_seed": SEED,      
    "loss_function": "RMSE",  
    "verbose": False       
}

eval_results = eval_model(
    x_train,
    y_train,
    x_test,
    y_test,
    3,
    'CatBoost',
    params
)

metrics = {
    "cv_mae": eval_results["cv_mae"],
    "cv_rmse": eval_results["cv_rmse"],
    "cv_mape": eval_results["cv_mape"],
    "test_mae": eval_results["test_mae"],
    "test_rmse": eval_results["test_rmse"],
    "test_mape": eval_results["test_mape"],
}

run_name = 'catboost_cleaned_data'

# Start an MLflow run
with mlflow.start_run(run_name=run_name):
    tags = {
        'model_version': 'cleaned_data',
    }
    
    # Log everything using our function
    model_info = log_run(
        params=params,
        metrics=metrics,
        tags=tags,
        trained_model=eval_results['model'],
        input_example=None,
        model_type="CatBoost",
        model_name="catboost_cleaned_data"
)

# Cleaned Data: MLP Neural Network

In [None]:
mlp_params = {
    "hidden_sizes": [128, 128, 128],
    "dropout": 0.2,
    "lr": 1e-3,
    "batch_size": 128,
    "epochs": 100,
    "verbose": True,
}

eval_results = eval_model(
    x_train,
    y_train,
    x_test,
    y_test,
    n_splits=3,
    model_name="MLP",
    model_params=mlp_params,
)


metrics = {
    "cv_mae": eval_results["cv_mae"],
    "cv_rmse": eval_results["cv_rmse"],
    "cv_mape": eval_results["cv_mape"],
    "test_mae": eval_results["test_mae"],
    "test_rmse": eval_results["test_rmse"],
    "test_mape": eval_results["test_mape"],
}

run_name = 'mlp_cleaned_data'

# Start an MLflow run
with mlflow.start_run(run_name=run_name):
    tags = {
        'model_version': 'cleaned_data',
    }
    
    # Log everything using our function
    model_info = log_run(
        params=params,
        metrics=metrics,
        tags=tags,
        trained_model=eval_results['model'],
        input_example=None,
        model_type="MLP",
        model_name="mlp_cleaned_data"
)

# Cleaned Data: LSTM

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

lstm_params = {
    "seq_len": 48,
    "hidden_size": 128,
    "num_layers": 2,
    "dropout": 0.0,
    "lr": 1e-3,
    "batch_size": 128,
    "epochs": 2,
    "verbose": True,
}


eval_results = eval_model(
    x_train,
    y_train,
    x_test,
    y_test,
    n_splits=3,
    model_name="LSTM",
    model_params=lstm_params,
)


metrics = {
    "cv_mae": eval_results["cv_mae"],
    "cv_rmse": eval_results["cv_rmse"],
    "cv_mape": eval_results["cv_mape"],
    "test_mae": eval_results["test_mae"],
    "test_rmse": eval_results["test_rmse"],
    "test_mape": eval_results["test_mape"],
}

run_name = 'lstm_cleaned_data'

# Start an MLflow run
with mlflow.start_run(run_name=run_name):
    tags = {
        'model_version': 'cleaned_data',
    }
    
    # Log everything using our function
    model_info = log_run(
        params=params,
        metrics=metrics,
        tags=tags,
        trained_model=eval_results['model'],
        input_example=None, # we used later, the sliced input must be used here
        model_type="LSTM",
        model_name="LSTM_cleaned_data"
)

# Child runs intro

MLflow tracks experiments as named groups where all your related runs live. 

As we have seen up to now, each "run" is one training session where you log parameters, metrics, and artifacts. 

Parent and Child Runs add a hierarchical layer to this setup.

**How does it help?**

With a parent-child structure, related runs are automatically grouped together. When you're running a hyperparameter search using a Bayesian approach on a particular model architecture, every iteration gets logged as a child run,

As your experiments grow in number and complexity, having a nested structure ensures your tracking remains manageable. 

Navigating through a structured hierarchy is much more efficient than scrolling through a flat list of hundreds or thousands of runs. 

This becomes particularly valuable as projects scale up.

Now, as the concept of Parent and Child runs is clear, let's see how we can implement this.

**Let  see how this works with CatBoost as an example.**

Say you're testing a CatBoost model with different three depth - from 5 to 10.

To do that, we create a function log_run_child.

This is a very similar function to log_run, but here we change the names of the run at each run and also specify parameter nested=True. This parameter indicates to MLflow that the runs are child runs.

Then we specify the parent run, and in a loop we run the child runs.

In [None]:
def log_run_child(
    run_name: str,
    params: Mapping[str, Any],
    metrics: Mapping[str, float],
    tags: Mapping[str, Any],
    trained_model: Any,
    model_type: Literal["RF", "CatBoost", "MLP", "LSTM"] = "RF",
    input_example: Optional[Any] = None,
    registered_model_name: Optional[str] = None,
    iteration: Optional[int] = None,
) -> Any:
    """
    Log a nested (child) MLflow run representing one iteration of an experiment.

    This function is used inside hyperparameter tuning loops,
    model selection experiments, or any parent run where each iteration
    should be tracked as its own nested MLflow run.

    A unique child run name and model artifact name are created automatically
    using the parent run name, iteration index, and model type.

    Parameters
    ----------
    run_name : str
        Base name of the parent run. Used to derive the child run identifier.
    params : Mapping[str, Any]
        Hyperparameters for this specific iteration.
    metrics : Mapping[str, float]
        Evaluation metrics for this iteration.
    tags : Mapping[str, Any]
        Additional metadata to attach to the child run.
    trained_model : Any
        Fitted model instance to be logged. Must match the chosen MLflow flavor.
    model_type : {"RF", "CatBoost", "MLP", "LSTM"}, default="RF"
        Determines which MLflow flavor is used for model logging.
        - RF       → ``mlflow.sklearn.log_model``
        - CatBoost → ``mlflow.catboost.log_model``
        - MLP      → ``mlflow.pytorch.log_model``
        - LSTM     → ``mlflow.pytorch.log_model``
    input_example : Any, optional
        Example input for MLflow signature inference and UI preview.
    registered_model_name : str, optional
        If provided, registers the logged model under this name.
    iteration : int, optional
        Iteration index used to generate a unique child run name. If None,
        the run name still works but uniqueness is not guaranteed.

    Returns
    -------
    Any
        The MLflow ``ModelInfo`` object returned by the corresponding
        MLflow log_model flavor.

    Raises
    ------
    ValueError
        If the ``model_type`` is unknown or unsupported.
    """

    child_run_name = f"{run_name}_iteration_{iteration}"
    model_name = f"{child_run_name}_{model_type}"

    with mlflow.start_run(run_name=child_run_name, nested=True):
        # ---- metadata ----
        mlflow.log_params(dict(params))
        mlflow.log_metrics(dict(metrics))
        mlflow.set_tags(dict(tags))
        mlflow.set_tag("model_version", run_name)

        # ---- model logging ----
        if model_type == "RF":
            model_info = mlflow.sklearn.log_model(
                sk_model=trained_model,
                name=model_name,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )

        elif model_type == "CatBoost":
            model_info = mlflow.catboost.log_model(
                cb_model=trained_model,
                name=model_name,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )

        elif model_type in ("MLP", "LSTM"):
            model_info = mlflow.pytorch.log_model(
                pytorch_model=trained_model,
                name=model_name,
                input_example=input_example,
                registered_model_name=registered_model_name,
            )

        else:
            raise ValueError(f"Unsupported model type: {model_type}")

    return model_info

Let's try to create parent-child runs over a range of max_depth values of CatBoost.

In [None]:
# Depth values you want to try
depth_values = [4, 6, 8, 10]

run_name = "catboost_depth_tuning"

# Common tags for all child runs
tags = {
    "model_family": "CatBoost",
    "dataset_version": "cleaned_data",
}

with mlflow.start_run(run_name=run_name):
    for idx, depth in enumerate(depth_values):
        # 1) Set params for this depth
        params = {
            "iterations": 200,       
            "learning_rate": 0.03,    
            "depth": depth,           # depth iteration
            "l2_leaf_reg": 3.0,       
            "random_seed": SEED,      
            "loss_function": "RMSE",  
            "verbose": False          
        }

        # 2) Train & evaluate CatBoost for this depth
        eval_results = eval_model(
            x_train,
            y_train,
            x_test,
            y_test,
            3,              
            "CatBoost",     
            params
        )

        # 3) Prepare metrics for logging
        metrics = {
            "cv_mae": eval_results["cv_mae"],
            "cv_rmse": eval_results["cv_rmse"],
            "cv_mape": eval_results["cv_mape"],
            "test_mae": eval_results["test_mae"],
            "test_rmse": eval_results["test_rmse"],
            "test_mape": eval_results["test_mape"],
            "depth": depth,
        }

        # 4) Log child run with your helper
        model_info = log_run_child(
            run_name=run_name,                 # parent run name
            params=params,
            metrics=metrics,
            tags=tags,
            trained_model=eval_results["model"],
            model_type="CatBoost",
            input_example=None,     
            registered_model_name=None,        
            iteration=idx                      
        )

Now, we can go to http://127.0.0.1:8080/ and see the child runs inside the parent run. We can also easily compare the metrics right in the UI.

# CatBoost Bayesian Hyperparameter Tuning with Child Runs

One of the best use cases for child runs are hyperparmaeter optimization runs.

Often, to select hyperparameters, we might need to run hundreds of runs and if we log every run as a separate "parent-like" run, the UI will become messy very quickly.

Let's see how we can use child runs and Bayesian Hyperparameter Tuning together.

In [None]:
def objective(
    trial: optuna.Trial, 
    x_train: np.ndarray, 
    y_train: np.ndarray, 
    x_test: np.ndarray, 
    y_test: np.ndarray
) -> float:
    """
    Optuna objective for CatBoost using eval_model and MLflow child runs.
    Minimizes cross-validated MAE (cv_mae).
    """
    np.random.seed(SEED)

    # ----- 1. Sample CatBoost hyperparameters -----
    params: Dict[str, Union[int, float, bool]] = {
        "iterations": 100,
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 0.1, 10.0, log=True),
        "random_seed": SEED,
        "loss_function": "RMSE",
        "verbose": False,
    }

    # ----- 2. Evaluate using your CV evaluator -----
    eval_results = eval_model(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        n_splits=3,
        model_name="CatBoost",
        model_params=params,
    )

    # ----- 3. Log as child MLflow run -----
    metrics = {
        "cv_mae": eval_results["cv_mae"],
        "cv_rmse": eval_results["cv_rmse"],
        "cv_mape": eval_results["cv_mape"],
        "test_mae": eval_results["test_mae"],
        "test_rmse": eval_results["test_rmse"],
        "test_mape": eval_results["test_mape"],
        "trial_number": trial.number,
    }

    tags = {
        "model_version": "catboost_optuna",
        "trial": trial.number,
    }

    # Log the run
    log_run_child(
        run_name=run_name,
        params=params,
        metrics=metrics,
        tags=tags,
        trained_model=eval_results["model"],
        model_type="CatBoost",
        input_example=None,
        iteration=trial.number,
    )
    # ----- 4. Optuna minimizes this -----
    return eval_results["cv_mape"]

**Note that after the best set of hyperparameters is selected, we re-fit the model and log the parameters in the parent run.**

In [None]:
run_name = "Catboost_optuna"

with mlflow.start_run(run_name=run_name):
    np.random.seed(SEED)

    # 1) Create the study and optimize CV MAE via objective()
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=SEED),
    )
    study.optimize(
        lambda trial: objective(trial, x_train, y_train, x_test, y_test),
        n_trials=5,
    )

    # 2) Rebuild CatBoost params from best Optuna params
    best_params = study.best_params.copy()
    params = {
        "iterations": 500,
        "learning_rate": best_params["learning_rate"],
        "depth": best_params["depth"],
        "l2_leaf_reg": best_params["l2_leaf_reg"],
        "random_seed": 42,
        "loss_function": "RMSE",
        "verbose": False,
    }

    # 3) Refit the best model params
    eval_results = eval_model(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        n_splits=3,
        model_name="CatBoost",
        model_params=params,
    )

    # 4) Combine metrics directly into a dict
    metrics = {
        "cv_mae": eval_results["cv_mae"],
        "cv_rmse": eval_results["cv_rmse"],
        "cv_mape": eval_results["cv_mape"],
        "test_mae": eval_results["test_mae"],
        "test_rmse": eval_results["test_rmse"],
        "test_mape": eval_results["test_mape"],
    }

    # 5) Log metrics & params in parent run
    mlflow.log_metrics(metrics)
    mlflow.log_params(params)
    mlflow.set_tag("best_model", "true")

    # 6) Log the final best model    
    model_info = mlflow.catboost.log_model(
        cb_model=eval_results["model"],
        name="best_catboost_model",
        input_example=x_train[:5],
        tags={"best_model": "true"},
    )

# Logging important artifacts

Often, especially for the best selected models, we want to reproduce the results.

To do that, we need to make sure:
- We know what kind of input the model requires
- How to create this input (aka how raw data is preprocessed)

Let's first learn how to save the input examples.

To do that, we deifne the model signature.

A model signature is MLflow’s way of recording the input and output schema of your model when you log it.

It defines:
- input column names
- input types (string, double, integer, tensor shapes, etc.)
- output types
- shapes (fixed or variable)

Note that we define the signature and the input example for the final (best) model only.

We can also save utils.py to save the preprocessing steps.

In [None]:
run_name = "Catboost_optuna"

MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)

with mlflow.start_run(run_name=run_name):
    np.random.seed(SEED)

    # 1) Create the study and optimize CV MAE via objective()
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=SEED),
    )
    study.optimize(
        lambda trial: objective(trial, x_train, y_train, x_test, y_test),
        n_trials=10,
    )

    # 2) Rebuild CatBoost params from best Optuna params
    best_params = study.best_params.copy()
    params = {
        "iterations": 500,
        "learning_rate": best_params["learning_rate"],
        "depth": best_params["depth"],
        "l2_leaf_reg": best_params["l2_leaf_reg"],
        "random_seed": SEED,
        "loss_function": "RMSE",
        "verbose": False,
    }

    # 3) Refit using ONLY eval_model
    eval_results = eval_model(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        n_splits=3,
        model_name="CatBoost",
        model_params=params,
    )

    print("METRICS----------")
    print(eval_results["cv_mae"])

    # 4) Metrics
    metrics = {
        "cv_mae": eval_results["cv_mae"],
        "cv_rmse": eval_results["cv_rmse"],
        "cv_mape": eval_results["cv_mape"],
        "test_mae": eval_results["test_mae"],
        "test_rmse": eval_results["test_rmse"],
        "test_mape": eval_results["test_mape"],
        "best_trial_number": study.best_trial.number,
    }

    # 5) Log metrics & params
    mlflow.log_metrics(metrics)
    mlflow.log_params(params)
    mlflow.set_tag("best_model", "true")

    # 6) Best model
    best_model = eval_results["model"]

    # Save preprocessing code snapshot as plain .py
    utils_path = Path(utils.__file__).resolve()
    preprocess_path = MODELS_DIR / "preprocessing.py"
    preprocess_path.write_text(
        utils_path.read_text(encoding="utf-8"),
        encoding="utf-8",
    )

    artifacts = {
        "preprocessing_code": str(preprocess_path),
    }

    # 7) Input example + signature
    input_example = x_train.iloc[:5].copy()
    signature = infer_signature(
        input_example,
        best_model.predict(input_example),
    )

    # 8) Log CatBoost model
    model_info = mlflow.catboost.log_model(
        cb_model=best_model,
        name="best_catboost_model",
        input_example=input_example,
        signature=signature,
        registered_model_name="Catboost_model_candidate",
    )

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(df_test['Power'].values[:100], label='True value')
plt.plot(best_model.predict(x_test)[:100], label='Predicted value')
plt.legend(fontsize=16)

This happened because we have not stored the X_scaler (StandardScaler) for the best model.

Let's fix it.

The easiest and most convinient way is to create a model wrapper on top of the model.

This allows us to create any custom predict method.

In this case, we will transform the features using the saved scaler object.

In [None]:
class CatBoostCustom(mlflow.pyfunc.PythonModel):
    """
    PyFunc CatBoost model with bundled X-scaler.
    Expects feature-engineered, unscaled input DataFrame.
    """
    def load_context(self, context: mlflow.pyfunc.PythonModelContext) -> None:
        self.x_scaler = joblib.load(context.artifacts["x_scaler"])
        self.model = CatBoostRegressor()
        self.model.load_model(context.artifacts["catboost_model"])

    def predict(
        self,
        context: mlflow.pyfunc.PythonModelContext,
        model_input: pd.DataFrame,
    ) -> np.ndarray:
        x_scaled = self.x_scaler.transform(model_input)
        return self.model.predict(x_scaled)

In [None]:
run_name = "Catboost_optuna_with_scaler"

MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)

with mlflow.start_run(run_name=run_name):
    np.random.seed(SEED)

    # 1) Optuna optimization
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=SEED),
    )
    study.optimize(
        lambda trial: objective(trial, x_train, y_train, x_test, y_test),
        n_trials=10,
    )

    # 2) Best params
    best_params = study.best_params.copy()
    params = {
        "iterations": 500,
        "learning_rate": best_params["learning_rate"],
        "depth": best_params["depth"],
        "l2_leaf_reg": best_params["l2_leaf_reg"],
        "random_seed": SEED,
        "loss_function": "RMSE",
        "verbose": False,
    }
    # 3) Train + evaluate
    eval_results = eval_model(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        n_splits=3,
        model_name="CatBoost",
        model_params=params,
    )

    best_model = eval_results["model"]
    x_scaler = eval_results["x_scaler"]

    # 4) Metrics
    metrics = {
        "cv_mae": eval_results["cv_mae"],
        "cv_rmse": eval_results["cv_rmse"],
        "cv_mape": eval_results["cv_mape"],
        "test_mae": eval_results["test_mae"],
        "test_rmse": eval_results["test_rmse"],
        "test_mape": eval_results["test_mape"],
        "best_trial_number": study.best_trial.number,
    }

    mlflow.log_metrics(metrics)
    mlflow.log_params(params)
    mlflow.set_tag("best_model", "true")

    # 5) Signature
    input_example = x_train.iloc[:5].copy()
    y_example = best_model.predict(x_scaler.transform(input_example))
    signature = infer_signature(input_example, y_example)

    # 6) Save artifacts locally 
    cb_path = MODELS_DIR / "catboost_model.cbm"
    scaler_path = MODELS_DIR / "x_scaler.joblib"
    preprocess_path = MODELS_DIR / "preprocessing.py"

    best_model.save_model(cb_path)
    joblib.dump(x_scaler, scaler_path)

    # Save preprocessing code snapshot as plain .py
    utils_path = Path(utils.__file__).resolve()
    preprocess_path.write_text(
        utils_path.read_text(encoding="utf-8"),
        encoding="utf-8",
    )

    artifacts = {
        "catboost_model": str(cb_path),
        "x_scaler": str(scaler_path),
        "preprocessing_code": str(preprocess_path),
    }

    # 7) Log PyFunc model
    model_info = mlflow.pyfunc.log_model(
        name="best_catboost_pyfunc",
        python_model=CatBoostCustom(),
        artifacts=artifacts,
        signature=signature,
        input_example=input_example,
        registered_model_name="Catboost_model_candidate",
        tags={
        "best_model": "true",
        "logged_model_name": "best_catboost_pyfunc",
        "run_name": run_name,
    },
    )

    print("Logged model:", model_info.model_uri)

# Logging and loading the best CatBoost Optuna model with X_scaler

Now, note that when we run the parent run, we re-train the best optuna model on the entire training set and log a tag - best_model: true. This allows us to easily load the best model, use it for predictions and consider moving the model to production.

In [None]:
# Find latest  model
top_models = mlflow.search_logged_models(
    filter_string=(
        "tag.best_model = 'true' "
        "AND tag.logged_model_name = 'best_catboost_pyfunc' "
        "AND tag.run_name = 'Catboost_optuna_with_scaler'"
    ),
    order_by=[{"field_name": "last_updated_timestamp", "ascending": False}],
    max_results=1,
)

best_model = top_models.iloc[0]

model_id = best_model["model_id"]
name = best_model["name"]

# Load model
loaded_model = mlflow.pyfunc.load_model(f"models:/{model_id}")

In [None]:
loaded_model

In [None]:
y_pred = loaded_model.predict(x_test)

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(df_test['Power'].values[:100], label='True value')
plt.plot(y_pred[:100], label='Predicted value')
plt.legend(fontsize=16)

In [None]:
plot_errors(x_test, df_test['Power'], y_pred, error='mae', error_threshold=50, rolling_window=151)

# Testing the best model on Production Data

Even though I are NOT supposed to be able to test the model on production data, since we are doing an educational project for a portfolio, it's useful to know what kind of performance we can expect the model to have on production data.

Let's test the best selected model on prod data.

In [None]:
# Read again for reproducibility
df_prod = pd.read_parquet('../data/01_raw/df_prod.parquet')
df_prod = df_prod[df_prod['Power'] > 20].copy()
df_prod.index = pd.to_datetime(df_prod['Timestamps'])
df_prod.drop(columns=['Timestamps'], inplace=True)

In [None]:
x_prod, y_prod, _, _ = get_clean_data(df_prod[:-1], df_prod[-1:], abs_diff_thresholds, smooth_window=3)

In [None]:
y_pred_prod = loaded_model.predict(x_prod)

In [None]:
mape_test_prod = np.mean(np.abs(y_prod.values.ravel() - y_pred_prod.ravel())/y_prod.values.ravel()*100)
print(f"MAPE on prod set: {mape_test_prod}")

In [None]:
plot_errors(x_prod, y_prod, y_pred_prod, error='mape', error_threshold=8.5, rolling_window=288)