In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
import seaborn as sns
sns.set()
# ML Models
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, ElasticNet
import xgboost as xgb
# tune
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import mlflow.xgboost
import mlflow.sklearn
import mlflow
from mlflow.models.signature import infer_signature
from urllib.parse import urlparse

import logging
import sys
import warnings
# logging.basicConfig()
# logger = logging.getLogger(__name__)

## Loading data

In [None]:
# loading data
df = pd.read_csv('data/php_data_all.csv', index_col=0)
# selecting data according to temperature range
# NOTE: Data selected between [300, 355]
df = df[(df['Te[K]'] > 300) & (df['Te[K]'] < 355)]
display(df)

## 1. Predicting TR

In [None]:
# data split
x = df[['Te[K]', 'dT[K]', 'P[bar]', 'Q[W]', 'Fluid', 'FR']]
y = df['TR[K/W]']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# data pipeline preparation
numeric_features = ['Te[K]', 'dT[K]', 'P[bar]', 'Q[W]','FR']
categorical_features = ['Fluid']

numeric_transformer = make_pipeline(StandardScaler())
categorical_tranformer = make_pipeline(OneHotEncoder(sparse_output=False))

preprocessor = ColumnTransformer(
    transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_tranformer, categorical_features)
    ])

In [None]:
def evaluate(y_test, y_pred, k=6):
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    n = y_test.shape[0]
    k = k
    r2_adj = 1 - (((1-r2)*(n-1)) / (n-k-1))
    return rmse, mae, r2, r2_adj

In [None]:
mlflow.sklearn.autolog()

def objective(params):
    with mlflow.start_run():
        mlflow.set_tag('model', 'xgb')
        mlflow.log_params(params=params)

        model_xgb = xgb.XGBRegressor(**params)
        data_pipeline_rfr = Pipeline(steps=[('Preprocessing', preprocessor),
                                ('RFR_model', model_xgb)])
        
        data_pipeline_rfr.fit(x_train, y_train)

        pred = data_pipeline_rfr.predict(x_test)
        
        rmse, ame, r2, r2_adj = evaluate(y_test=y_test, y_pred=pred) # NEED TO CHECK WITH ONE TARGET VARIABLE
        
        signature = infer_signature(x_train, pred)

        #trackin_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        # if trackin_url_type_store != "file":
        #     mlflow.sklearn.log_model(model_xgb, 'model', registered_model_name='XGBRegressor', signature=signature)
        # else:
        #     mlflow.sklearn.log_model(model_xgb, 'model', signature=signature)
        mlflow.sklearn.log_model(
        sk_model=model_xgb,
        artifact_path="sklearn-model",
        signature=signature,
        registered_model_name="xgb-regressor",
    )
    return {'loss': rmse, 'status': STATUS_OK}

In [18]:
# mlflow manual logging of metrics and model

def objective_xgb(params):
    with mlflow.start_run():
        mlflow.set_tag('model', 'xgb')
        mlflow.log_params(params=params)

        model_xgb = xgb.XGBRegressor(**params)
        data_pipeline_xgb = Pipeline(steps=[('Preprocessing', preprocessor),
                                ('xgb_model', model_xgb)])
        
        data_pipeline_xgb.fit(x_train, y_train)

        pred = data_pipeline_xgb.predict(x_test)
        
        rmse, ame, r2, r2_adj = evaluate(y_test=y_test, y_pred=pred) # NEED TO CHECK WITH ONE TARGET VARIABLE
        
        signature = infer_signature(x_train, pred)

        mlflow.log_metric('rmse', rmse)
        mlflow.log_metric('ame', ame)
        mlflow.log_metric('r2', r2)
        mlflow.log_metric('r2_adj', r2_adj)
        mlflow.sklearn.log_model(
        sk_model=model_xgb,
        artifact_path="sklearn-model",
        signature=signature,
        registered_model_name="xgb-regressor",
    )
    return {'loss': rmse, 'status': STATUS_OK}

In [19]:
# hyper params space
search_space_xgb = {'eta': hp.uniform('eta', 0.1,1), 
                'max_depth': hp.randint('max_depth', 2,5)}

In [20]:
# hyperopt - hyper param tunning
best_results_xgb = fmin(fn=objective_xgb,
                    space=search_space_xgb,
                    algo=tpe.suggest,
                    max_evals=15,
                    trials=Trials())

  0%|          | 0/15 [00:00<?, ?trial/s, best loss=?]

  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:39:55 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 71



  7%|▋         | 1/15 [00:01<00:19,  1.40s/trial, best loss: 0.006541170877881907]

Created version '71' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:39:56 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 72



 13%|█▎        | 2/15 [00:02<00:16,  1.31s/trial, best loss: 0.006449847085942419]

Created version '72' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:39:57 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 73



 20%|██        | 3/15 [00:03<00:15,  1.30s/trial, best loss: 0.006449847085942419]

Created version '73' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:39:58 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 74



 27%|██▋       | 4/15 [00:05<00:13,  1.23s/trial, best loss: 0.003490002834326971]

Created version '74' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:00 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 75



 33%|███▎      | 5/15 [00:06<00:12,  1.23s/trial, best loss: 0.003490002834326971]

Created version '75' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:01 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 76



 40%|████      | 6/15 [00:07<00:10,  1.22s/trial, best loss: 0.003490002834326971]

Created version '76' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:02 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 77



 47%|████▋     | 7/15 [00:08<00:09,  1.22s/trial, best loss: 0.0027031219690585597]

Created version '77' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:03 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 78



 53%|█████▎    | 8/15 [00:09<00:08,  1.21s/trial, best loss: 0.0027031219690585597]

Created version '78' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 79



 60%|██████    | 9/15 [00:11<00:07,  1.20s/trial, best loss: 0.0027031219690585597]

Created version '79' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:06 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 80



 67%|██████▋   | 10/15 [00:12<00:06,  1.22s/trial, best loss: 0.0027031219690585597]

Created version '80' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:07 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 81



 73%|███████▎  | 11/15 [00:13<00:04,  1.21s/trial, best loss: 0.0027031219690585597]

Created version '81' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:08 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 82



 80%|████████  | 12/15 [00:14<00:03,  1.19s/trial, best loss: 0.0027031219690585597]

Created version '82' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:09 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 83



 87%|████████▋ | 13/15 [00:15<00:02,  1.21s/trial, best loss: 0.0027031219690585597]

Created version '83' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:11 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 84



 93%|█████████▎| 14/15 [00:17<00:01,  1.21s/trial, best loss: 0.0027031219690585597]

Created version '84' of model 'xgb-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'xgb-regressor' already exists. Creating a new version of this model...
2023/11/04 18:40:12 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-regressor, version 85



100%|██████████| 15/15 [00:18<00:00,  1.23s/trial, best loss: 0.0027031219690585597]


Created version '85' of model 'xgb-regressor'.


RFR

In [23]:
# mlflow manual logging of metrics and model

def objective_rfr(params):
    with mlflow.start_run():
        mlflow.set_tag('model', 'xgb')
        mlflow.log_params(params=params)

        model_rfr = RandomForestRegressor(**params)
        data_pipeline_rfr = Pipeline(steps=[('Preprocessing', preprocessor),
                                ('RFR_model', model_rfr)])
        
        data_pipeline_rfr.fit(x_train, y_train)

        pred = data_pipeline_rfr.predict(x_test)
        
        rmse, ame, r2, r2_adj = evaluate(y_test=y_test, y_pred=pred) # NEED TO CHECK WITH ONE TARGET VARIABLE
        
        signature = infer_signature(x_train, pred)

        mlflow.log_metric('rmse', rmse)
        mlflow.log_metric('ame', ame)
        mlflow.log_metric('r2', r2)
        mlflow.log_metric('r2_adj', r2_adj)
        mlflow.sklearn.log_model(
        sk_model=model_rfr,
        artifact_path="sklearn-model",
        signature=signature,
        registered_model_name="rfr-regressor",
    )
    return {'loss': rmse, 'status': STATUS_OK}

In [26]:
# hyper params space
search_space_rfr = {'n_estimators': hp.randint('n_estimators', 10,100), 
                'max_depth': hp.randint('max_depth', 2,5)}

In [27]:
# hyperopt - hyper param tunning
best_results_rfr = fmin(fn=objective_rfr,
                    space=search_space_rfr,
                    algo=tpe.suggest,
                    max_evals=15,
                    trials=Trials())

  0%|          | 0/15 [00:00<?, ?trial/s, best loss=?]

  inputs = _infer_schema(model_input) if model_input is not None else None



Successfully registered model 'rfr-regressor'.
2023/11/04 18:46:53 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 1



  7%|▋         | 1/15 [00:01<00:24,  1.74s/trial, best loss: 0.0619340402056451]

Created version '1' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:46:54 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 2



 13%|█▎        | 2/15 [00:03<00:19,  1.47s/trial, best loss: 0.0619340402056451]

Created version '2' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:46:55 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 3



 20%|██        | 3/15 [00:04<00:16,  1.39s/trial, best loss: 0.027315659933019994]

Created version '3' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:46:56 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 4



 27%|██▋       | 4/15 [00:05<00:14,  1.30s/trial, best loss: 0.02690422375364724] 

Created version '4' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:46:58 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 5



 33%|███▎      | 5/15 [00:06<00:12,  1.26s/trial, best loss: 0.02690422375364724]

Created version '5' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:46:59 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 6



 40%|████      | 6/15 [00:07<00:11,  1.26s/trial, best loss: 0.02690422375364724]

Created version '6' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:00 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 7



 47%|████▋     | 7/15 [00:09<00:09,  1.23s/trial, best loss: 0.02690422375364724]

Created version '7' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:01 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 8



 53%|█████▎    | 8/15 [00:10<00:08,  1.25s/trial, best loss: 0.02690422375364724]

Created version '8' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:03 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 9



 60%|██████    | 9/15 [00:11<00:07,  1.27s/trial, best loss: 0.02690422375364724]

Created version '9' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 10



 67%|██████▋   | 10/15 [00:12<00:06,  1.26s/trial, best loss: 0.02690422375364724]

Created version '10' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:05 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 11



 73%|███████▎  | 11/15 [00:14<00:04,  1.23s/trial, best loss: 0.02690422375364724]

Created version '11' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:06 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 12



 80%|████████  | 12/15 [00:15<00:03,  1.21s/trial, best loss: 0.02690422375364724]

Created version '12' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:07 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 13



 87%|████████▋ | 13/15 [00:16<00:02,  1.20s/trial, best loss: 0.02690422375364724]

Created version '13' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:09 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 14



 93%|█████████▎| 14/15 [00:17<00:01,  1.21s/trial, best loss: 0.02690422375364724]

Created version '14' of model 'rfr-regressor'.
  inputs = _infer_schema(model_input) if model_input is not None else None



Registered model 'rfr-regressor' already exists. Creating a new version of this model...
2023/11/04 18:47:10 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: rfr-regressor, version 15



100%|██████████| 15/15 [00:18<00:00,  1.26s/trial, best loss: 0.02690422375364724]


Created version '15' of model 'rfr-regressor'.
