In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib # Để lưu/load preprocessor, model
import json   # Để lưu thông tin features

from IPython.display import display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Cấu hình
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

# Định nghĩa đường dẫn
project_root = os.path.dirname(os.getcwd())
processed_data_folder = os.path.join(project_root, 'data', 'bts_processed')
models_folder = os.path.join(project_root, 'saved_models') # Thư mục lưu model và scaler

if not os.path.exists(models_folder):
    os.makedirs(models_folder)
    print('Đã tạo thư mục saved_models:', models_folder)

# Load dữ liệu đã làm giàu từ EDA (02)
input_filename_eda = 'flight_delay_2024_eda_enriched.csv'
eda_enriched_file_path = os.path.join(processed_data_folder, input_filename_eda)

df_model_input = None # Khởi tạo
if os.path.exists(eda_enriched_file_path):
    df_model_input = pd.read_csv(eda_enriched_file_path)
    print('Đã load dữ liệu từ:', eda_enriched_file_path)
    print('Shape của df_model_input:', df_model_input.shape)
    display(df_model_input.head())
else:
    print('LỖI: Không tìm thấy file {}.'.format(eda_enriched_file_path))
    print('Vui lòng chạy notebook EDA (Phần 2) và lưu file đúng cách trước.')

Đã load dữ liệu từ: C:\Users\hoiti\PycharmProjects\flight_delay_bts_project\data\bts_processed\flight_delay_2024_eda_enriched.csv
Shape của df_model_input: (5677, 18)


Unnamed: 0,airport,arr_cancelled,arr_diverted,arr_flights,carrier,carrier_ct,delay_rate,late_aircraft_ct,month,nas_ct,security_ct,weather_ct,year,carrier_ct_rate,weather_ct_rate,nas_ct_rate,security_ct_rate,late_aircraft_ct_rate
0,SGF,0.0,0.0,119.0,OH,5.5,0.134454,6.9,10,3.6,0.0,0.0,2024,0.046218,0.0,0.030252,0.0,0.057983
1,SHV,0.0,0.0,92.0,OH,4.56,0.130435,1.52,10,5.92,0.0,0.0,2024,0.049565,0.0,0.064348,0.0,0.016522
2,SRQ,7.0,0.0,31.0,OH,0.4,0.096774,2.57,10,0.03,0.0,0.0,2024,0.012903,0.0,0.000968,0.0,0.082903
3,STL,0.0,0.0,115.0,OH,6.87,0.130435,7.79,10,0.33,0.0,0.0,2024,0.059739,0.0,0.00287,0.0,0.067739
4,SYR,0.0,0.0,15.0,OH,1.42,0.266667,1.53,10,1.06,0.0,0.0,2024,0.094667,0.0,0.070667,0.0,0.102


In [56]:
# --- 3.1. Feature Selection & Target Definition ---
X = None
y = None
selected_features = []
categorical_cols = [] # Đổi tên cho ngắn gọn
numeric_cols = []     # Đổi tên cho ngắn gọn

if df_model_input is not None and not df_model_input.empty:
    target_variable_name = 'delay_rate'

    # Define potential features based on EDA (Part 2)
    potential_cats = ['month', 'carrier', 'airport']
    potential_nums = [
        'arr_flights', 'arr_cancelled', 'arr_diverted',
        'carrier_ct_rate', 'weather_ct_rate', 'nas_ct_rate',
        'security_ct_rate', 'late_aircraft_ct_rate'
    ]

    # Filter to existing columns only
    categorical_cols = [col for col in potential_cats if col in df_model_input.columns]
    numeric_cols = [col for col in potential_nums if col in df_model_input.columns]

    selected_features = sorted(list(set(categorical_cols + numeric_cols))) # Ensure unique and sorted

    # Critical check: Ensure target is NOT in features
    if target_variable_name in selected_features:
        selected_features.remove(target_variable_name)
        print('WARNING: Target "{}" was in features, removed.'.format(target_variable_name))

    # Final check for missing selected features or target in df_model_input
    missing_selected_features = [f for f in selected_features if f not in df_model_input.columns]
    if missing_selected_features:
        print('CRITICAL: Selected features NOT FOUND in df_model_input: {}'.format(missing_selected_features))
        # Potentially raise error or stop execution if this happens
    elif target_variable_name not in df_model_input.columns:
        print('CRITICAL: Target column "{}" NOT FOUND in df_model_input.'.format(target_variable_name))
    else:
        X = df_model_input[selected_features].copy()
        y = df_model_input[target_variable_name].copy()
        print('--- Features & Target Created ---')
        print('  Selected Features ({}): {}'.format(len(selected_features), selected_features))
        print('  Categorical Features for Preprocessing:', categorical_cols)
        print('  Numeric Features for Preprocessing:', numeric_cols)
        print('  X shape: {}, y shape: {}'.format(X.shape, y.shape))
else:
    print("Input DataFrame 'df_model_input' is empty or not loaded. Skipping feature selection.")

--- Features & Target Created ---
  Selected Features (11): ['airport', 'arr_cancelled', 'arr_diverted', 'arr_flights', 'carrier', 'carrier_ct_rate', 'late_aircraft_ct_rate', 'month', 'nas_ct_rate', 'security_ct_rate', 'weather_ct_rate']
  Categorical Features for Preprocessing: ['month', 'carrier', 'airport']
  Numeric Features for Preprocessing: ['arr_flights', 'arr_cancelled', 'arr_diverted', 'carrier_ct_rate', 'weather_ct_rate', 'nas_ct_rate', 'security_ct_rate', 'late_aircraft_ct_rate']
  X shape: (5677, 11), y shape: (5677,)


In [57]:
# --- 3.2. Train-Test Split ---
X_train, X_test, y_train, y_test = (None, None, None, None)

if X is not None and y is not None:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
    # shuffle=True (mặc định) là tốt để đảm bảo tính ngẫu nhiên
    print('--- Train/Test Data Split ---')
    print('  X_train: {}, y_train: {}'.format(X_train.shape, y_train.shape))
    print('  X_test: {}, y_test: {}'.format(X_test.shape, y_test.shape))
else:
    print("X or y is None. Skipping train-test split.")

--- Train/Test Data Split ---
  X_train: (4541, 11), y_train: (4541,)
  X_test: (1136, 11), y_test: (1136,)


In [58]:
# --- 3.3. Feature Preprocessing (Encoding & Scaling) ---
X_train_processed = None
X_test_processed = None
preprocessor_obj = None # Đổi tên biến cho nhất quán
final_feature_names = None # List of feature names AFTER preprocessing

if X_train is not None and X_test is not None:
    # These lists (categorical_cols, numeric_cols) should be defined and filtered in Cell 2
    # based on df_model_input and then further filtered based on X_train.columns if needed,
    # but if X comes directly from df_model_input[selected_features], they should match.

    # Double check: use columns present in X_train for pipeline
    cats_for_pipe = [col for col in categorical_cols if col in X_train.columns]
    nums_for_pipe = [col for col in numeric_cols if col in X_train.columns]

    # Define transformers
    numerical_transformer = StandardScaler()
    categorical_transformer = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

    # Create ColumnTransformer
    preprocessor_obj = ColumnTransformer(
        transformers=[
            ('num_transform', numerical_transformer, nums_for_pipe),
            ('cat_transform', categorical_transformer, cats_for_pipe)
        ],
        remainder='drop' # Explicitly drop other columns (should be none if selected_features is comprehensive)
    )

    # Fit preprocessor on X_train and transform X_train
    print('Fitting preprocessor on X_train...')
    X_train_processed = preprocessor_obj.fit_transform(X_train)

    # Transform X_test using the fitted preprocessor
    print('Transforming X_test...')
    X_test_processed = preprocessor_obj.transform(X_test)
    print('Preprocessing complete.')

    # Attempt to get feature names after transformation
    try:
        ohe_cols = list(preprocessor_obj.named_transformers_['cat_transform']
                        .get_feature_names_out(cats_for_pipe))
        final_feature_names = nums_for_pipe + ohe_cols
        print('  Total features after preprocessing: {}'.format(len(final_feature_names)))
        # print('  Example processed feature names (first 5):', final_feature_names[:5]) # Optional log
    except Exception as e:
        print('  Note: Could not retrieve feature names after OHE: {}.'.format(e))
        print('        X_train_processed and X_test_processed will be NumPy arrays without column names attached.')
        final_feature_names = None # Ensure it's None if failed

    print('  X_train_processed shape: {}'.format(X_train_processed.shape))
    print('  X_test_processed shape: {}'.format(X_test_processed.shape))
else:
    print("Train/Test data not available. Skipping preprocessing.")

Fitting preprocessor on X_train...
Transforming X_test...
Preprocessing complete.
  Total features after preprocessing: 385
  X_train_processed shape: (4541, 385)
  X_test_processed shape: (1136, 385)


In [59]:
# --- 3.4. Save Preprocessor and Feature Info ---
if preprocessor_obj is not None and X_train is not None: # Ensure preprocessor was created and X_train exists
    preprocessor_file = 'flight_data_preprocessor.joblib'
    preprocessor_save_path = os.path.join(models_folder, preprocessor_file)
    joblib.dump(preprocessor_obj, preprocessor_save_path)
    print('Preprocessor saved to:', preprocessor_save_path)

    # Save feature information used for THIS preprocessor
    # 'cats_for_pipe' and 'nums_for_pipe' are what ColumnTransformer actually saw.
    feature_info_dict = {
        'numeric_features_in_pipe': nums_for_pipe,       # Columns fed to numerical_transformer
        'categorical_features_in_pipe': cats_for_pipe, # Columns fed to categorical_transformer
        'original_X_train_columns_order': list(X_train.columns), # Order of columns in X_train when preprocessor was fit
        'processed_feature_names_order': final_feature_names if final_feature_names else 'Not available'
    }
    feature_info_file = 'preprocessor_feature_config.json' # More descriptive name
    feature_info_save_path = os.path.join(models_folder, feature_info_file)
    try:
        with open(feature_info_save_path, 'w') as f:
            json.dump(feature_info_dict, f, indent=4)
        print('Preprocessor feature configuration saved to:', feature_info_save_path)
    except Exception as e:
        print('Error saving feature_info.json: {}'.format(e))
else:
    if preprocessor_obj is None: print("Preprocessor not created. Skipping save.")
    if X_train is None: print("X_train not available. Cannot save feature info related to it.")

print('\n--- END OF PART 3: DATA PREPARATION FOR MODELING COMPLETE ---')

Preprocessor saved to: C:\Users\hoiti\PycharmProjects\flight_delay_bts_project\saved_models\flight_data_preprocessor.joblib
Preprocessor feature configuration saved to: C:\Users\hoiti\PycharmProjects\flight_delay_bts_project\saved_models\preprocessor_feature_config.json

--- END OF PART 3: DATA PREPARATION FOR MODELING COMPLETE ---


In [60]:
# --- Modeling & MLflow ---
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import mlflow
import mlflow.sklearn
import numpy as np # Dùng cho np.sqrt

# MLflow: set experiment
# Sẽ tạo thư mục 'mlruns' ở root project nếu chưa có
experiment_name = 'Flight_Delay_BTS'
try:
    mlflow.set_experiment(experiment_name)
    print('MLflow experiment set to: {}'.format(experiment_name))
except Exception as e:
    print('MLflow set_experiment error: {}. Ensure MLflow server is configured if not using local.'.format(e))

MLflow experiment set to: Flight_Delay_BTS


In [61]:
# 7.1. Verify data from Part 3 is ready and valid
data_is_truly_ready = False
mlflow_input_example_final = None

if ('X_train_processed' in locals() and isinstance(X_train_processed, np.ndarray) and
    'y_train' in locals() and isinstance(y_train, pd.Series) and
    'X_test_processed' in locals() and isinstance(X_test_processed, np.ndarray) and
    'y_test' in locals() and isinstance(y_test, pd.Series) and
    X_train_processed.shape[0] == y_train.shape[0] and X_test_processed.shape[0] == y_test.shape[0] and
    X_train_processed.shape[1] == X_test_processed.shape[1] and X_train_processed.shape[1] > 0):

    data_is_truly_ready = True
    print('VERIFIED: Data (X_processed, y) looks structurally ready for modeling.')
    print('  Shape X_train_processed: {} (Features: {})'.format(X_train_processed.shape, X_train_processed.shape[1]))

    # 7.2. Prepare input_example for MLflow (prioritize NumPy if X_train_processed is NumPy)
    num_ex_rows = min(5, X_train_processed.shape[0])
    mlflow_input_example_final = X_train_processed[:num_ex_rows] # Default to NumPy array
    # print('  mlflow_input_example_final created as NumPy array.') # Optional log

    # Optionally, if feature names are reliable AND X_train_processed was meant to be DataFrame when fitted
    # This part is tricky because preprocessor usually outputs NumPy. For now, stick to NumPy for input_example
    # if 'processed_feature_names_list' in locals() and processed_feature_names_list and len(processed_feature_names_list) == X_train_processed.shape[1]:
    #     mlflow_input_example_final = pd.DataFrame(X_train_processed[:num_ex_rows], columns=processed_feature_names_list)
    #     print('  mlflow_input_example_final updated to DataFrame (with column names).')

else:
    print('CRITICAL FAILURE: Input data from Part 3 is NOT valid or missing.')
    print('  >>> CHECK PART 3 CELLS (Split, Preprocessing) THOROUGHLY. <<<')

VERIFIED: Data (X_processed, y) looks structurally ready for modeling.
  Shape X_train_processed: (4541, 385) (Features: 385)


In [62]:
import os # Needed for path joining later, e.g. saving models/plots
import joblib
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
from IPython.display import display

print('Phase 4 Init (Extreme Simplicity): Modeling & MLflow libs ready.')

# MLflow Experiment
mlflow_experiment_extreme_run = 'Flight_Delay_BTS_V1' # New distinct name
try:
    current_experiment_info = mlflow.set_experiment(mlflow_experiment_extreme_run)
    print(f'MLflow experiment for Extreme Simplicity Run: "{current_experiment_info.name}"')
except Exception as e:
    print(f'MLflow set_experiment error: {e}. Ensure MLflow server is running and configured.')

# 6.3. Suppress specific warnings globally for cleaner output (use with caution)
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.utils.validation")
warnings.filterwarnings("ignore", category=UserWarning, module="mlflow.sklearn")
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn") # For potential seaborn plot warnings
print("Global UserWarning (sklearn, mlflow) and FutureWarning (seaborn) suppression enabled for cleaner P4 output.")

Phase 4 Init (Extreme Simplicity): Modeling & MLflow libs ready.
MLflow experiment for Extreme Simplicity Run: "Flight_Delay_BTS_V1"


In [63]:
# 8.1. Define the training, evaluation, and logging function (Extreme Simplicity)
def execute_model_pipeline_extreme(
    model_instance,
    run_name_suffix,
    xtr, ytr, xte, yte, # Assumed to be valid and existing
    hyperparameters=None,
    mlflow_signature_input_ex=None # Renamed for clarity in extreme version
):
    """
    Trains, evaluates, and logs a model to MLflow. Assumes data is ready.
    """
    full_run_name = f"{model_instance.__class__.__name__}_{run_name_suffix}"
    print(f"\nAttempting to train: {full_run_name}")

    with mlflow.start_run(run_name=full_run_name) as active_run:
        if hyperparameters:
            try:
                if hasattr(model_instance, 'set_params'): # Check before calling
                    model_instance.set_params(**hyperparameters)
                mlflow.log_params(hyperparameters)
            except Exception as e_param:
                print(f"  Warning: Param issue for {full_run_name}. Error: {e_param}")

        model_instance.fit(xtr, ytr)
        predictions = model_instance.predict(xte)

        mae = mean_absolute_error(yte, predictions)
        rmse = np.sqrt(mean_squared_error(yte, predictions))
        r2 = r2_score(yte, predictions)

        print(f"  RESULTS for {full_run_name}: MAE={mae:.4f}, RMSE={rmse:.4f}, R2={r2:.4f}")

        mlflow.log_metric("test_mae", mae)
        mlflow.log_metric("test_rmse", rmse)
        mlflow.log_metric("test_r2", r2)

        model_artifact_name = f"{model_instance.__class__.__name__.lower()}_model_extreme"
        if mlflow_signature_input_ex is not None:
            mlflow.sklearn.log_model(
                sk_model=model_instance,
                artifact_path=model_artifact_name,
                input_example=mlflow_signature_input_ex
            )
        else:
            mlflow.sklearn.log_model(
                sk_model=model_instance,
                artifact_path=model_artifact_name
            )
        return model_instance

In [64]:
# 9.1. Prepare MLflow input example locally (as Cell 7 was removed)
# This MUST succeed if X_train_processed is valid from Part 3.
mlflow_input_example_for_signature = None
try:
    # Crucial: X_train_processed, final_feature_names must exist from Part 3
    # If X_train_processed is None or empty, this will be skipped.
    if 'X_train_processed' in locals() and X_train_processed is not None and X_train_processed.shape[0] > 0:
        num_rows_example = min(5, X_train_processed.shape[0])
        sample_data_example = X_train_processed[:num_rows_example]

        if ('final_feature_names' in locals() and
            isinstance(final_feature_names, list) and
            len(final_feature_names) == X_train_processed.shape[1]):
            mlflow_input_example_for_signature = pd.DataFrame(sample_data_example, columns=final_feature_names)
            print('Cell 9: Created DataFrame input_example for MLflow signature.')
        else:
            mlflow_input_example_for_signature = sample_data_example
            print('Cell 9: Created NumPy array input_example for MLflow (final_feature_names not suitable/available).')
    else:
        print("Cell 9 WARNING: X_train_processed not found or empty. Models will be logged without input_example.")
except NameError as ne: # Handles cases where X_train_processed or final_feature_names might not be defined
    print(f"Cell 9 WARNING: Could not create input_example due to missing variable ({ne}). Models logged without it.")
except Exception as e_ex_create:
    print(f"Cell 9 WARNING: Error creating input_example: {e_ex_create}. Models logged without it.")


# 9.2. Train various models (Extreme Simplicity)
# ASSUMPTION: X_train_processed, y_train, X_test_processed, y_test are valid and exist from Part 3.
# If not, this cell will raise an error directly.
print("\n---=== BATCH MODEL TRAINING (EXTREME SIMPLICITY RUN) ===---")
extreme_run_trained_models = {}

# Common arguments for model execution
# These will cause NameError if Part 3 variables are not set.
common_args_extreme = {
    "xtr": X_train_processed, "ytr": y_train,
    "xte": X_test_processed, "yte": y_test,
    "mlflow_signature_input_ex": mlflow_input_example_for_signature
}

# Model 1: Linear Regression
lr_extreme_model = LinearRegression()
extreme_run_trained_models['LinearRegression'] = execute_model_pipeline_extreme(
    model_instance=lr_extreme_model, run_name_suffix="Baseline_Extreme", **common_args_extreme
)

# Model 2: Ridge Regression
ridge_extreme_params = {'alpha': 1.0}
ridge_extreme_model = Ridge() # Params set by execute_model_pipeline_extreme
extreme_run_trained_models['Ridge'] = execute_model_pipeline_extreme(
    model_instance=ridge_extreme_model, run_name_suffix=f"Alpha{ridge_extreme_params['alpha']}_Extreme",
    hyperparameters=ridge_extreme_params, **common_args_extreme
)

# Model 3: Lasso Regression
lasso_extreme_params = {'alpha': 0.001, 'max_iter': 10000}
lasso_extreme_model = Lasso()
extreme_run_trained_models['Lasso'] = execute_model_pipeline_extreme(
    model_instance=lasso_extreme_model, run_name_suffix=f"Alpha{lasso_extreme_params['alpha']}_Extreme",
    hyperparameters=lasso_extreme_params, **common_args_extreme
)

# Model 4: Random Forest Regressor (Simplified params for speed)
rf_extreme_params = {'n_estimators': 50, 'max_depth': 8, 'random_state': 42, 'n_jobs': -1}
rf_extreme_model = RandomForestRegressor()
extreme_run_trained_models['RandomForest'] = execute_model_pipeline_extreme(
    model_instance=rf_extreme_model, run_name_suffix="QuickRF_Extreme",
    hyperparameters=rf_extreme_params, **common_args_extreme
)

# Model 5: Gradient Boosting Regressor (Simplified params for speed)
gbr_extreme_params = {'n_estimators': 50, 'learning_rate': 0.1, 'max_depth': 4, 'random_state': 42}
gbr_extreme_model = GradientBoostingRegressor()
extreme_run_trained_models['GradientBoosting'] = execute_model_pipeline_extreme(
    model_instance=gbr_extreme_model, run_name_suffix="QuickGBR_Extreme",
    hyperparameters=gbr_extreme_params, **common_args_extreme
)

print("\n---=== FINISHED BATCH MODEL TRAINING (EXTREME SIMPLICITY RUN) ===---")
extreme_run_trained_models = {k: v for k, v in extreme_run_trained_models.items() if v is not None} # Filter out failed trainings
if not extreme_run_trained_models:
    print("CRITICAL WARNING: No models were successfully trained in the extreme run. Check Part 3 outputs and data.")
else:
    print(f"Successfully trained models (extreme run): {list(extreme_run_trained_models.keys())}")

Cell 9: Created DataFrame input_example for MLflow signature.

---=== BATCH MODEL TRAINING (EXTREME SIMPLICITY RUN) ===---

Attempting to train: LinearRegression_Baseline_Extreme
  RESULTS for LinearRegression_Baseline_Extreme: MAE=0.0001, RMSE=0.0001, R2=1.0000

Attempting to train: Ridge_Alpha1.0_Extreme
  RESULTS for Ridge_Alpha1.0_Extreme: MAE=0.0001, RMSE=0.0001, R2=1.0000

Attempting to train: Lasso_Alpha0.001_Extreme
  RESULTS for Lasso_Alpha0.001_Extreme: MAE=0.0015, RMSE=0.0027, R2=0.9993

Attempting to train: RandomForestRegressor_QuickRF_Extreme
  RESULTS for RandomForestRegressor_QuickRF_Extreme: MAE=0.0124, RMSE=0.0330, R2=0.8907

Attempting to train: GradientBoostingRegressor_QuickGBR_Extreme
  RESULTS for GradientBoostingRegressor_QuickGBR_Extreme: MAE=0.0084, RMSE=0.0286, R2=0.9180

---=== FINISHED BATCH MODEL TRAINING (EXTREME SIMPLICITY RUN) ===---
Successfully trained models (extreme run): ['LinearRegression', 'Ridge', 'Lasso', 'RandomForest', 'GradientBoosting']


In [65]:
CHOSEN_MODEL_KEY = 'GradientBoosting'
chosen_gbr_r2 = 0.9180
chosen_gbr_rmse = 0.0286

if CHOSEN_MODEL_KEY in extreme_run_trained_models and \
   'model' in extreme_run_trained_models[CHOSEN_MODEL_KEY]:

    gbr_model_to_save = extreme_run_trained_models[CHOSEN_MODEL_KEY]['model']
    save_path = Path(models_folder) / f"best_model_gbr.joblib"

    try:
        joblib.dump(gbr_model_to_save, save_path)
    except Exception as e_joblib:
        print(f"JOB LIB SAVE ERROR: {e_joblib}")

    try:
        exp = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
        if exp:
            with mlflow.start_run(
                run_name=f"Best_{CHOSEN_MODEL_KEY}",
                experiment_id=exp.experiment_id,
                log_system_metrics=False
            ) as final_run:
                mlflow.set_tag("is_best", "true")
                mlflow.log_params({"final_model_type": CHOSEN_MODEL_KEY})
                mlflow.log_metrics({"r2": chosen_gbr_r2, "rmse": chosen_gbr_rmse})
                mlflow.sklearn.log_model(
                    sk_model=gbr_model_to_save,
                    artifact_path="final_model_package",
                    input_example=mlflow_input_example_for_signature,
                )
    except Exception as e_mlflow:
        print(f"MLFLOW LOGGING ERROR: {e_mlflow}")