In [1]:
import xgboost as xgb
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
data = pd.read_excel(url)

print(data.head())
print(data.info())
print(data.describe())

     X1     X2     X3      X4   X5  X6   X7  X8     Y1     Y2
0  0.98  514.5  294.0  110.25  7.0   2  0.0   0  15.55  21.33
1  0.98  514.5  294.0  110.25  7.0   3  0.0   0  15.55  21.33
2  0.98  514.5  294.0  110.25  7.0   4  0.0   0  15.55  21.33
3  0.98  514.5  294.0  110.25  7.0   5  0.0   0  15.55  21.33
4  0.90  563.5  318.5  122.50  7.0   2  0.0   0  20.84  28.28
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X1      768 non-null    float64
 1   X2      768 non-null    float64
 2   X3      768 non-null    float64
 3   X4      768 non-null    float64
 4   X5      768 non-null    float64
 5   X6      768 non-null    int64  
 6   X7      768 non-null    float64
 7   X8      768 non-null    int64  
 8   Y1      768 non-null    float64
 9   Y2      768 non-null    float64
dtypes: float64(8), int64(2)
memory usage: 60.1 KB
None
               X1      

In [4]:
X = data.iloc[:, :-2]
y = data['Y1']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost Regressor": xgb.XGBRegressor(n_estimators=100, random_state=42)
}

In [6]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")
mlflow.set_experiment("heat_load_prediction")

for name, model in models.items():
    with mlflow.start_run():

        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        mlflow.log_param("model_name", name)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("r2", r2)
        mlflow.sklearn.log_model(model, name = name, input_example = X_train_scaled)

2025/12/06 21:22:25 INFO mlflow.tracking.fluent: Experiment with name 'heat_load_prediction' does not exist. Creating a new experiment.


üèÉ View run melodic-sloth-196 at: http://127.0.0.1:5000/#/experiments/164497386802624848/runs/806485277f0d4f9390e22e991d3e587d
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/164497386802624848




üèÉ View run painted-ram-18 at: http://127.0.0.1:5000/#/experiments/164497386802624848/runs/caf5787c2db5435cb54883a683e4452d
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/164497386802624848




üèÉ View run chill-robin-17 at: http://127.0.0.1:5000/#/experiments/164497386802624848/runs/8bbd270f9f5f477395b282f38a2ee673
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/164497386802624848


In [7]:
model_name = "XGBoost Regressor"
run_id = input('Run ID:')
model_uri = f'runs:/{run_id}/{model_name}'
with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri=model_uri, name=model_name)

Run ID: 8bbd270f9f5f477395b282f38a2ee673


Successfully registered model 'XGBoost Regressor'.
2025/12/06 21:23:12 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGBoost Regressor, version 1
Created version '1' of model 'XGBoost Regressor'.


üèÉ View run chill-robin-17 at: http://127.0.0.1:5000/#/experiments/164497386802624848/runs/8bbd270f9f5f477395b282f38a2ee673
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/164497386802624848


In [12]:
model_version = 1
model_name = "XGBoost Regressor"
model_uri = f"models:/{model_name}/{model_version}"
loaded_model = mlflow.sklearn.load_model(model_uri)
y_pred = loaded_model.predict(X_test_scaled)
print(y_pred[:4])
y_test[:4]

[15.09619  13.24421  32.464813 41.37811 ]


668    16.47
324    13.17
624    32.82
690    41.32
Name: Y1, dtype: float64

In [13]:
model_version = 1
model_name = "XGBoost Regressor"
current_model_uri = f"models:/{model_name}/{model_version}"
production_model_name = "heat-load-prod"

client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri=current_model_uri, dst_name=production_model_name)

Successfully registered model 'heat-load-prod'.
Copied version '1' of model 'XGBoost Regressor' to version '1' of model 'heat-load-prod'.


<ModelVersion: aliases=[], creation_timestamp=1765054316020, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1765054316020, metrics=None, model_id=None, name='heat-load-prod', params=None, run_id='8bbd270f9f5f477395b282f38a2ee673', run_link='', source='models:/XGBoost Regressor/1', status='READY', status_message=None, tags={}, user_id='', version='1'>

In [14]:
production_model_name = "heat-load-prod"
prod_model_uri = f"models:/{production_model_name}@champion"

loaded_model = mlflow.sklearn.load_model(prod_model_uri)
y_pred = loaded_model.predict(X_test_scaled)
y_pred[:4]

array([15.09619 , 13.24421 , 32.464813, 41.37811 ], dtype=float32)