# Librairies nécessaires

In [2]:
import commentjson
import os
#os.chdir('/mnt/batch/tasks/shared/LS_root/mounts/clusters/pythonnb/code/Users/david.mouquet/modeling')


import missingno as msno
import sys
import importlib
import pandas as pd
import math
import datetime
import numpy as np

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers


from sklearn.metrics import r2_score
from onnx.onnx_pb import StringStringEntryProto

# Librairies personnelles
path = os.getcwd()
path_src = os.path.abspath(os.path.join(path, os.pardir,"src"))
sys.path.append(path_src)
path_mode = os.path.abspath(os.path.join(path, os.pardir,"src","modelisation"))
sys.path.append(path_mode)

from casestudy import set_exp_study
importlib.reload(set_exp_study)

import importlib
from importdata import import_from_influxdb
importlib.reload(import_from_influxdb)
from analysdesc import analyse_descriptive
from utilitaires import utilitaires

import keras_tuner as kt
from keras.layers import Input, Embedding, LSTM, Dense, concatenate

# Fonctions annexes

In [3]:
import matplotlib.pyplot as plt

def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)


def plot_model_mesure(mesure,model):

  import plotly.graph_objects as go

  # Create traces
  fig = go.Figure()


  fig.add_trace(go.Scatter(x=mesure.index, y=mesure,
                          mode='lines',
                          name="Mesure"))
  fig.add_trace(go.Scatter(x=model.index, y=model,
                          mode='lines',
                          name="Model"))

  fig.update_layout(title='Comparaison Modele/Mesure',
                    width=800, 
                    height=600,
                    xaxis_title='Date',
                    yaxis_title='IPE')                        

  fig.show()











# Lecture des données

In [8]:
importlib.reload(set_exp_study)
with open("experiment_config.json", encoding='utf-8') as file:
    exp_config = commentjson.load(file)


In [9]:
file_model_param = exp_config['dir_models']+'CONFIGS/'+exp_config['client']+'/'+exp_config['site']+'/'+exp_config['depart']+'/'+exp_config['case_study']+'/model_config.json'
with open(file_model_param, encoding='utf-8') as file:
    dico_model = commentjson.load(file)


In [None]:
freq                = dico_model['freq']
uv_mangling         = dico_model['mangling']
nom_data_store      = dico_model['data_store']

#onnx_rep = "C:/Users/33623/Dropbox (Ultiwatt)/D - ULTIVISION INDUSTRIES/4-ARCELORMITTAL/2- DEPLOIEMENT AMF/2-Projet LOT 2/10-Models/onnx"
#onnx_model_name     = onnx_rep + "/" + site + "/" + uv_mangling+"."+model_id+".onnx"


ref_periode_debut = datetime.datetime.strptime(dico_model['ref_periode_debut'], '%d/%m/%Y %H:%M:%S').isoformat()
ref_periode_fin   = datetime.datetime.strptime(dico_model['ref_periode_fin'], '%d/%m/%Y %H:%M:%S').isoformat()


data, clean_report, message_error = import_from_influxdb.Charger_Preparer_Data(ref_periode_debut = ref_periode_debut, 
                                         ref_periode_fin   = ref_periode_fin,
                                         ipe_tag           = dico_model["tag_modelise"],
                                         dico_du_model     = dico_model,
                                         use_seuil_min     = True,
                                         use_seuil_max     = True,
                                         clean_data        = False,
                                         concat_after      = True,
                                         load_unused_feature = True,
                                         zscore            = 3)

## Recherche valeurs manquantes dans les colonnes

### Pourcentage de valeur manquante

In [None]:
def num_missing(x):
    perc_missing = round(100*sum(x.isnull())/len(x),1)
    return perc_missing

print("Valeurs manquantes par colonne:")
df_missing = data.apply(num_missing, axis=0)
print(df_missing)

## Suppression des features non utilisées

In [24]:

features_kept = [f['nom'] for tag, f in dico_model['facteurs'].items() if f['used']]

data = data[[dico_model['tag_name']] + features_kept]

# Preprocessing

### Découpage des données en train et test

In [12]:
data = data.dropna()

train_dataset = data.sample(frac=0.8, random_state=0)
test_dataset = data.drop(train_dataset.index)
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop(dico_model["tag_name"])
test_labels = test_features.pop(dico_model["tag_name"])

### Normalisation des facteurs numériques

In [13]:
inputs = []

for name, column in train_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32    

  inputs.append(tf.keras.Input(shape=(1,), name=name, dtype=dtype))


In [14]:
numeric_inputs = {input.name:input for input in inputs
                  if input.dtype==tf.float32}


x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(train_features[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)

preprocessed_inputs = [all_numeric_inputs]


In [16]:
for input in inputs:
  if input.dtype == tf.float32:
    continue

  lookup = layers.StringLookup(vocabulary=np.unique(train_features[input.name]))
  one_hot = layers.CategoryEncoding(max_tokens=lookup.vocab_size())

  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)

In [None]:
preprocessed_inputs

# Modèle

In [18]:
preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)
first_layer = layers.Dense(64, activation='relu')(preprocessed_inputs_cat)
terminal_layer = layers.Dense(1,name='target')(first_layer)
main_model = tf.keras.models.Model(inputs = inputs,outputs=terminal_layer,name = 'model')
main_model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))


In [None]:
main_model.summary()

In [19]:
features_dict = {name: np.array(value) 
                         for name, value in train_features.items()}
features_dict_test = {name: np.array(value) 
                         for name, value in test_features.items()}

In [None]:
history = main_model.fit(x=features_dict, y=train_labels, epochs=600,validation_data=(features_dict_test,test_labels))

## Analyse des résultats

In [None]:
plot_loss(history)

In [None]:


df_train_predictions = main_model.predict(features_dict).flatten()
df_test_predictions = main_model.predict(features_dict_test).flatten() 
df_train_predictions = pd.Series(index=train_features.index,data=df_train_predictions)
df_test_predictions  = pd.Series(index=test_features.index,data=df_test_predictions)

df_pred = pd.concat([df_train_predictions,df_test_predictions])
df_mesure = pd.concat([train_labels,test_labels])
df_pred.sort_index(inplace=True)
df_mesure.sort_index(inplace=True)


print(r2_score(train_labels.values,df_train_predictions),r2_score(test_labels.values,df_test_predictions))



## Réglage Hyperparamètres

In [39]:
def model_builder(hp):


  hp_units = hp.Int('units', min_value=32, max_value=64, step=10)
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  hp_activation = hp.Choice("activation", ["relu", "tanh"])

  preprocessed_inputs_cat = layers.Concatenate(name="concate_input")(preprocessed_inputs)
  couche_1 = layers.Dense(units=hp_units, activation=hp_activation,name="first_layer")(preprocessed_inputs_cat)
  sortie = layers.Dense(1,name='target')(couche_1)
  main_model = tf.keras.models.Model(inputs = inputs,outputs=sortie)
  main_model.compile(loss="mean_squared_error", metrics=["mean_squared_error"],
                optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate))


  return main_model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective="val_mean_squared_error",
                     max_epochs=400,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(x=features_dict, y=train_labels, epochs=400,validation_data=(features_dict_test,test_labels), callbacks=[stop_early])

In [None]:
tuner.results_summary()

In [None]:
best_model = tuner.get_best_models()[0]

df_train_predictions = best_model.predict(features_dict).flatten()
df_test_predictions = best_model.predict(features_dict_test).flatten() 
df_train_predictions = pd.Series(index=train_features.index,data=df_train_predictions)
df_test_predictions  = pd.Series(index=test_features.index,data=df_test_predictions)

df_pred = pd.concat([df_train_predictions,df_test_predictions])
df_mesure = pd.concat([train_labels,test_labels])
df_pred.sort_index(inplace=True)
df_mesure.sort_index(inplace=True)


print(r2_score(train_labels.values,df_train_predictions),r2_score(test_labels.values,df_test_predictions))


plot_model_mesure(df_mesure,df_pred)

# Export vers ONNX

In [None]:
importlib.reload(modelreport)

model_type = "réseau de neuronnes"

modelreport_json = modelreport.BuildModelReport(model_type  = model_type,
                                                ref_periode_debut  = datetime.datetime.strftime(data.index[0], '%Y-%m-%d %H:%M:%S')  ,
                                                ref_periode_fin= datetime.datetime.strftime(data.index[-1], '%Y-%m-%d %H:%M:%S'),
                                                clean_report = clean_report,
                                                description = '',
                                                test_data_set = test_dataset,
                                                train_data_set = train_dataset,
                                                fitted_model = best_model,
                                                df_num_corr = df_num_corr,
                                                dico_model = dico_model,
                                                data = data)

In [32]:
import tf2onnx
(model_onnx, storage) = tf2onnx.convert.from_keras(best_model)
model_onnx.metadata_props.append(StringStringEntryProto(key="ReportModel", value = modelreport_json))

with open(onnx_model_name, "wb") as f:
    f.write(model_onnx.SerializeToString()) 

In [34]:
modelreport_json

import json
json_rep = json.loads(modelreport_json)

In [35]:
json_rep['uv_formula']

{'formula': '[model]  .Arg("Humidite_PAC", [tag_33643], 7.467802861111114, 12.0081104109589) .Arg("Perc_Matiere_volatile_PAC", [tag_33644], 22.106071805555555, 24.78209244755245) .Arg("Cokerie_Production_de_Coke", [tag_33562], 1823.76, 4344.84) .Arg("Taux_Cendre", [tag_34371], 7.4, 11.1) .Arg("Temps_Cuisson", [tag_37912], 19.239522988505744, 38.66184444444445) .Outputs("variable_out1")'}