# Production Modeles

# Import des librairies nécessaires

In [1]:
# Librairie générales

import commentjson
import os
import sys
import importlib
import numpy as np



# Librairies personnelles
path = os.getcwd()
path_src = os.path.abspath(os.path.join(path, os.pardir,"src"))
sys.path.append(path_src)
path_mode = os.path.abspath(os.path.join(path, os.pardir,"src","modelisation"))
sys.path.append(path_mode)

from casestudy import set_exp_study
importlib.reload(set_exp_study)

from importdata import import_from_influxdb
importlib.reload(import_from_influxdb)
from analysdesc import analyse_descriptive
from utilitaires import utilitaires
from modelisation import mlflow_functions
from modelisation import lgbm_functions
from modelisation import build_run_models
importlib.reload(analyse_descriptive)


#from modelisation import build_run_models, lgbm_functions, mlflow_functions

# Librairies ML
import mlflow
from mlflow.tracking import MlflowClient
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.pipeline import Pipeline
import optuna


# Set up de l'expérience

In [2]:
client = "Demo"

importlib.reload(set_exp_study)
with open("configs/"+client+"/experiment_config.json", encoding='utf-8') as file:
    exp_config = commentjson.load(file)

dico_exp = set_exp_study.Experiment_Params(exp_config, mlflow)

dico_figure = dict() # Dictionnaire où sont stockées les figures

# Import des données

In [None]:
data, clean_report, message_error = import_from_influxdb.Charger_Preparer_Data(ref_periode_debut = dico_exp['ref_periode_debut'], 
                                         ref_periode_fin   = dico_exp['ref_periode_fin'],
                                         ipe_tag           = dico_exp['dico_model']['tag_modelise'],
                                         dico_du_model     = dico_exp['dico_model'],
                                         use_seuil_min     = False,
                                         use_seuil_max     = False,
                                         clean_data        = False,                            
                                         concat_after      = True,
                                         load_unused_feature = True,
                                         client            = exp_config['client'],
                                         zscore            = 3)

In [5]:
debut_ref =  "2024-02-05 18:08:00"
fin_ref   =  "2024-03-11 05:00:00"

debut_suivi = "2023-09-02 13:31:00"
fin_suivi   = "2024-01-31 16:17:00"

data_ref = data[(data.index>=debut_ref) & (data.index<=fin_ref)]
data_suivi = data[(data.index>=debut_suivi) & (data.index<=fin_suivi)]

# Modélisation

#### Lancement optuna

In [None]:
from lightgbm import LGBMRegressor



In [None]:
import lightgbm

data_ref.dropna(inplace=True)
Y_ref  = data_ref[data_ref.columns[0]]
X_ref  = data_ref.drop(columns=data_ref.columns[0])


callbacks = [lightgbm.early_stopping(100, verbose=0), lightgbm.log_evaluation(period=0)]

fixed_hp =   {
        'metric': 'rmse', 
        'random_state': 48,
    }

def objective(trial,data=X_ref,target=Y_ref):

    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,random_state=42)

    callbacks = [lightgbm.early_stopping(100, verbose=0), lightgbm.log_evaluation(period=0)]

    model = LGBMRegressor()
    
    param = { 
         'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.5,0.6,0.7]),
         'learning_rate': trial.suggest_categorical('learning_rate', [0.02,0.04,0.08,0.12]),
         'max_depth': trial.suggest_categorical('max_depth', [4,5,6]),
         'n_estimators':trial.suggest_int('n_estimators',10,500,10),
         'num_leaves' : trial.suggest_int('num_leaves',100,200,20),
         'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
         'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
         'subsample': trial.suggest_categorical('subsample', [0.7,0.8,0.9])
    }


    for p, pv in fixed_hp.items():
        param[p] = pv

    model = LGBMRegressor(**param)

    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],callbacks=callbacks)

    preds_train = model.predict(train_x)  
    rmse_train = mean_squared_error(train_y, preds_train,squared=False)
    preds_test = model.predict(test_x)
    rmse_test = mean_squared_error(test_y, preds_test,squared=False)



    alpha_overfit = 0.4
    score_final = alpha_overfit*rmse_train + (1-alpha_overfit)*np.abs(rmse_train-rmse_test)
      
    return score_final

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

In [None]:
#study.trials_dataframe()
optuna.visualization.plot_optimization_history(study)

#### Relance du modèle avec les meilleurs HP

In [8]:
best_params=study.best_params
for p, pv in fixed_hp.items():
    best_params[p] = pv

model = LGBMRegressor(**best_params)

In [None]:
model.fit(X_ref,Y_ref)


In [10]:
Y_ref_pred = model.predict(X_ref)

In [None]:
from sklearn.metrics import r2_score

r2_score(Y_ref,Y_ref_pred)

In [12]:
lower = LGBMRegressor(**best_params,objective = 'quantile', alpha = 1 - 0.95)
lower.fit(X_ref,Y_ref)
lower_pred_ref = lower.predict(X_ref)

In [13]:
upper = LGBMRegressor(**best_params,objective = 'quantile', alpha = 0.95)
upper.fit(X_ref,Y_ref)
upper_pred_ref = upper.predict(X_ref)

In [None]:
data_ref['model'] = Y_ref_pred
data_ref['lower_pred'] = lower_pred_ref
data_ref['upper_pred'] = upper_pred_ref



In [18]:
data.dropna(inplace=True)
Y  = data[data_ref.columns[0]]
X  = data.drop(columns=data_ref.columns[0])

In [19]:
upper_pred = upper.predict(X)
lower_pred = lower.predict(X)

In [None]:
data['lower']

In [20]:
import pandas as pd

data_upper_lower = pd.DataFrame(index=data.index,data={'upper':upper_pred,'lower':lower_pred})

In [None]:
data_upper_lower.plot()

In [None]:
import plotly.graph_objects as go


# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_ref.index, y=data_ref['DEB_VAP_TOTAL'],
                    mode='lines',
                    name='mesure'))
fig.add_trace(go.Scatter(x=data_ref.index, y=data_ref['model'],
                    mode='lines',
                    name='modele'))
fig.add_trace(go.Scatter(x=data_ref.index, y=data_ref['lower_pred'],
                    mode='lines',
                    name='lower_pred'))
fig.add_trace(go.Scatter(x=data_ref.index, y=data_ref['upper_pred'],
                    mode='lines',
                    name='upper_pred'))                    
fig.show()

In [14]:
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer
import onnxruntime as rt
from onnxmltools import convert_lightgbm
from skl2onnx import to_onnx, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes  # noqa
from onnxmltools import __version__ as oml_version
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm
from onnxruntime.capi.onnxruntime_pybind11_state import Fail as OrtFail
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes  # noqa
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm  # noqa
import onnxmltools.convert.common.data_types
from skl2onnx.common.data_types import FloatTensorType, StringTensorType
from skl2onnx.common.data_types import Int64TensorType
from onnx.onnx_pb import StringStringEntryProto
from lightgbm import LGBMRegressor


In [15]:
def skl2onnx_convert_lightgbm(scope, operator, container):
    options = scope.get_options(operator.raw_operator)
    if 'split' in options:
        if pv.Version(oml_version) < pv.Version('1.9.2'):
            warnings.warn(
                "Option split was released in version 1.9.2 but %s is "
                "installed. It will be ignored." % oml_version)
        operator.split = options['split']
    else:
        operator.split = None
    convert_lightgbm(scope, operator, container)

def convert_to_onnx(X, model):

    update_registered_converter(
        LGBMRegressor, 'LightGbmLGBMRegressor',
        calculate_linear_regressor_output_shapes,
        skl2onnx_convert_lightgbm,
        options={'split': None})
    
    inputs = []
    for k, v in zip(X.columns, X.dtypes):
        if v == 'int64':
            t = Int64TensorType([None, 1])
        elif v == 'float64':
            t = FloatTensorType([None, 1])
        else:
            t = StringTensorType([None, 1])
        inputs.append((k, t))

    output = [('target',FloatTensorType([None, 1]))]

    model_onnx = to_onnx(model, initial_types=inputs,final_types=output,
                        target_opset={'': 13, 'ai.onnx.ml': 2})


    return model_onnx

In [16]:
X_ref

Unnamed: 0_level_0,Prod_moy,DEBIT_VAPEUR_SECH,Grammage,Temp_Ext
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-02-05 18:08:00,41.010971,56.825127,81.890221,7.973333
2024-02-05 18:09:00,40.945065,57.151169,81.766258,7.970000
2024-02-05 18:10:00,40.980160,56.611277,81.784515,7.966667
2024-02-05 18:11:00,41.364578,57.379623,82.530579,7.963333
2024-02-05 18:12:00,41.019264,58.561216,81.824287,7.960000
...,...,...,...,...
2024-03-11 04:56:00,37.699162,53.619308,84.705276,5.306667
2024-03-11 04:57:00,37.889694,53.735004,85.124123,5.305000
2024-03-11 04:58:00,37.784264,54.478428,84.951111,5.303333
2024-03-11 04:59:00,37.777130,54.844524,84.920113,5.301667


# tutorial

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing

def sklearn_to_df(data_loader):
    X_data = data_loader.data
    X_columns = data_loader.feature_names
    x = pd.DataFrame(X_data, columns=X_columns)

    y_data = data_loader.target
    y = pd.Series(y_data, name='target')

    return x, y


In [20]:
x, y = sklearn_to_df(fetch_california_housing())

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42)

In [23]:
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
#from data_loader import x_train, x_test, y_train, y_test

In [24]:
regressor = lgb.LGBMRegressor()
regressor.fit(x_train, y_train)
regressor_pred = regressor.predict(x_test)

In [25]:
lower = lgb.LGBMRegressor(objective = 'quantile', alpha = 1 - 0.95)
lower.fit(x_train, y_train)
lower_pred = lower.predict(x_test)

In [26]:
upper = lgb.LGBMRegressor(objective = 'quantile', alpha = 0.95)
upper.fit(x_train, y_train)
upper_pred = upper.predict(x_test)