# Librairies nécessaires

In [10]:
import commentjson
import os
#os.chdir('/mnt/batch/tasks/shared/LS_root/mounts/clusters/pythonnb/code/Users/david.mouquet/modeling')


import missingno as msno
import sys
import importlib
import pandas as pd
import math
import datetime
import numpy as np

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column

sys.path.append("../SRC/")
import Utilitaires as utils
importlib.reload(utils)
import RapportModelisation as modelreport
import importFromUV as preprocdata
importlib.reload(preprocdata)
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split


# Lecture des données

In [2]:
file_model_param   = "models_param/TCC-DK-Elec.json"

model_id           = "R1V1_Conso_electricite_En_Charge_ref_2019_V2"
site               = "DK-TCC"

###############################################################

with open(file_model_param, encoding='utf-8') as file:
    dico_model_all = commentjson.load(file)

dico_model = dico_model_all[model_id]



#pkl_model_name      = "resu/models/"+model_id+"_"+ dico_model['type_model'] + "_" +dico_model['freq']+".pkl"

onnx_model_name     = dico_model['mangling']+"."+model_id+".onnx"
nom_model_registre  = dico_model['nom_model_registre']
freq                = dico_model['freq']
uv_mangling         = dico_model['mangling']
nom_data_store      = dico_model['data_store']


ref_periode_debut = datetime.datetime.strptime(dico_model['ref_periode_debut'], '%d/%m/%Y %H:%M:%S').isoformat()
ref_periode_fin   = datetime.datetime.strptime(dico_model['ref_periode_fin'], '%d/%m/%Y %H:%M:%S').isoformat()


data, clean_report = preprocdata.Charger_Preparer_Data(ref_periode_debut = ref_periode_debut, 
                                         ref_periode_fin   = ref_periode_fin,
                                         ipe_tag           = dico_model["tag_modelise"],
                                         dico_du_model     = dico_model,
                                         use_seuil_min     = True,
                                         use_seuil_max     = True,
                                         clean_data        = False,
                                         concat_after      = True,
                                         load_unused_feature = True,
                                         zscore            = 3)

import du tag: tag_4278
import du tag: tag_10260
import du tag: tag_10261
import du tag: tag_35633
import du tag: tag_3931
import du tag: tag_9323
import du tag: tag_3928
import du tag: tag_3359


In [3]:
data.describe()

Unnamed: 0,DK_TCC_Laminoir_R1V1_IPE_ELEC_EN_CHARGE_kWh_BOB,R1V1_Taux_reduction_epaisseur,R1V1_Refoulement,Tps_Entree_TCC_Sortie_R1V1,LARG_BRAME_THEOR,teta_moy_v1r1,Longueur_Brame
count,157161.0,157161.0,157161.0,157161.0,157161.0,157161.0,157161.0
mean,45.84704,23.413113,101.671245,71.524917,1348.975099,1198.351839,8.52407
std,14.98127,2.165151,72.970507,15.172197,196.331746,17.829024,1.18223
min,6.385385,8.496719,0.0,22.248928,750.0,1100.061279,5.21
25%,34.392022,21.004028,34.971924,72.882462,1190.0,1186.693481,7.885
50%,45.430984,24.316536,99.221436,76.252317,1340.0,1198.140381,8.885
75%,56.563919,24.808295,163.442993,78.468355,1490.0,1209.923096,9.505
max,105.832936,28.977343,301.874573,130.71038,1922.447144,1293.523804,9.85


# Coefficient de corrélation

In [4]:
df_num_corr = utils.Compute_Corr_Coef(data=data, dico_model =dico_model)

# Preprocessing

### Découpage des données en train et test

In [5]:
data = data.dropna()

In [48]:

train_dataset, test_dataset = train_test_split(data, test_size=0.25)

train_labels = train_dataset.pop(dico_model["tag_name"])
test_labels  = test_dataset.pop(dico_model["tag_name"])

### transformation en tensors

In [55]:
def df_to_dataset(df_features, df_label, shuffle=True, batch_size=32):
  ds = tf.data.Dataset.from_tensor_slices((dict(df_features), df_label))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df_features))
  ds = ds.batch(batch_size)
  return ds

In [56]:
train_dataset_ts = df_to_dataset(train_dataset,train_labels)
test_dataset_ts  = df_to_dataset(test_dataset,test_labels)

In [75]:
train_features = tf.convert_to_tensor(train_dataset, dtype=tf.float32)


In [76]:
train_features

<tf.Tensor: shape=(117870, 7), dtype=float32, numpy=
array([[  20.932,    0.   ,   31.236, ..., 1201.834,    9.185,   61.   ],
       [  20.461,  170.647,   75.365, ..., 1206.751,    8.535,    1.   ],
       [  26.383,  105.998,   74.992, ..., 1174.347,    6.005,   33.   ],
       ...,
       [  25.557,   85.338,   79.094, ..., 1190.798,    9.775,    4.   ],
       [  24.268,  145.757,   77.817, ..., 1180.856,    8.865,   40.   ],
       [  24.373,  178.483,   76.34 , ..., 1171.52 ,    9.085,   33.   ]],
      dtype=float32)>

### Normalisation des facteurs numériques

In [20]:
def get_scal(feature):
  def minmax(x):
    mini = train_dataset[feature].min()
    maxi = train_dataset[feature].max()
    return (x - mini)/(maxi-mini)
  return(minmax)

In [57]:
num_c = [name for name, column in train_dataset.items() if  column.dtype == np.float64]
cat_c = [name for name, column in train_dataset.items() if  column.dtype == object]

In [58]:
feature_columns = []
for header in num_c:
  scal_input_fn = get_scal(header)
  feature_columns.append(feature_column.numeric_column(header, normalizer_fn=scal_input_fn))


In [59]:
feature_columns

[NumericColumn(key='R1V1_Taux_reduction_epaisseur', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A5E0>),
 NumericColumn(key='R1V1_Refoulement', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A790>),
 NumericColumn(key='Tps_Entree_TCC_Sortie_R1V1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A550>),
 NumericColumn(key='LARG_BRAME_THEOR', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A820>),
 NumericColumn(key='teta_moy_v1r1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A8B0>),
 NumericColumn(key='Longueur_Brame', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=<function get_scal.<locals>.minmax at 0x000001F9E246A940>)]

### Encodage des variables catégorielles

In [60]:
for feature_name in cat_c:
  vocabulary = data[feature_name].unique()
  col_cat = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
  one_hot = feature_column.indicator_column(col_cat)
  feature_columns.append(one_hot)

In [61]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [62]:
model= tf.keras.Sequential([feature_layer,layers.Dense(64, activation='relu'),layers.Dense(1)],name = 'target')


model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))


In [34]:
features_dict = {name: np.array(value) 
                         for name, value in train_dataset.items()}
features_dict_test = {name: np.array(value) 
                         for name, value in test_features.items()}



In [72]:
train_dataset_ts

<BatchDataset element_spec=({'R1V1_Taux_reduction_epaisseur': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'R1V1_Refoulement': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'Tps_Entree_TCC_Sortie_R1V1': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'LARG_BRAME_THEOR': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'teta_moy_v1r1': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'Longueur_Brame': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'Code_Metal': TensorSpec(shape=(None,), dtype=tf.string, name=None)}, TensorSpec(shape=(None,), dtype=tf.float64, name=None))>

In [63]:
#history = model.fit(x=(train_dataset_ts,train_labels), epochs=20,validation_data=(test_dataset_ts,test_labels))

#history = model.fit(x=features_dict, y=train_labels, epochs=20,validation_data=(features_dict_test,test_labels))


history = model.fit(train_dataset_ts, epochs=20,validation_data=test_dataset_ts)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Apprentissage

In [60]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [62]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
%tensorboard --logdir logs/fit

# Performance globale

In [67]:
features_dict = {name: np.array(value) 
                         for name, value in train_dataset.items()}
features_dict_test = {name: np.array(value) 
                         for name, value in test_dataset.items()}


train_predictions = model.predict(features_dict).flatten()




In [68]:
test_predictions = model.predict(features_dict_test).flatten()                         



In [70]:
r2_score(test_labels.values,test_predictions)

0.9135498748543753

In [71]:
print(r2_score(train_labels.values,train_predictions),r2_score(test_labels.values,test_predictions))

0.9136158416204834 0.9135498748543753


In [53]:
df_prediction = pd.Series(index=data.index,data=pred_label)

In [None]:
import plotly.graph_objects as go


# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_prediction.index, y=df_prediction,
                    mode='lines',
                    name='Modele'))
fig.add_trace(go.Scatter(x=true_labels.index, y=true_labels,
                    mode='lines',
                    name='Mesure'))

#fig.update_layout(title='Four n° ' + N_Four + ' Durée entre défournement et prochain enfournement',
#                   xaxis_title='Date',
#                   yaxis_title='minutes')
fig.show()

# Export vers ONNX

In [127]:
importlib.reload(modelreport)

model_type = "réseau de neuronnes"

modelreport_json = modelreport.BuildModelReport(model_type  = model_type,
                                                ref_periode_debut  = datetime.datetime.strftime(data.index[0], '%Y-%m-%d %H:%M:%S')  ,
                                                ref_periode_fin= datetime.datetime.strftime(data.index[-1], '%Y-%m-%d %H:%M:%S'),
                                                clean_report = clean_report,
                                                description = '',
                                                test_data_set = test_data_set,
                                                train_data_set = train_data_set,
                                                fitted_model = fitted_model,
                                                df_num_corr = df_num_corr,
                                                dico_model = dico_model,
                                                data = data)

'target'

In [47]:
import tf2onnx
(onnx_model_proto, storage) = tf2onnx.convert.from_keras(model)



Cannot infer shape for target/dense_features_2/Code_Metal_embedding/None_Lookup/LookupTableFindV2: target/dense_features_2/Code_Metal_embedding/None_Lookup/LookupTableFindV2:0
Cannot infer shape for target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GreaterEqual: target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GreaterEqual:0
Cannot infer shape for target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GatherV2_2: target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GatherV2_2:0


In [139]:
onnx_model_proto.metadata_props

[]

In [48]:
import tensorflow as tf
import tf2onnx
#model.layers[-1]._name = 'target'
# model.save('model_ts')
# model = tf.keras.models.load_model('model_ts')

tf2onnx.convert.from_keras(model, output_path='model_R1V1_ts_test.onnx')



Cannot infer shape for target/dense_features_2/Code_Metal_embedding/None_Lookup/LookupTableFindV2: target/dense_features_2/Code_Metal_embedding/None_Lookup/LookupTableFindV2:0
Cannot infer shape for target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GreaterEqual: target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GreaterEqual:0
Cannot infer shape for target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GatherV2_2: target/dense_features_2/Code_Metal_embedding/Code_Metal_embedding_weights/GatherV2_2:0


(ir_version: 7
 producer_name: "tf2onnx"
 producer_version: "1.13.0 2c1db5"
 graph {
   node {
     input: "teta_moy_v1r1"
     input: "const_fold_opt__672"
     output: "target/dense_features_2/teta_moy_v1r1/ExpandDims:0"
     name: "target/dense_features_2/teta_moy_v1r1/ExpandDims"
     op_type: "Unsqueeze"
   }
   node {
     input: "target/dense_features_2/teta_moy_v1r1/ExpandDims:0"
     input: "target/dense_features_2/teta_moy_v1r1/sub/y:0"
     output: "target/dense_features_2/teta_moy_v1r1/sub:0"
     name: "target/dense_features_2/teta_moy_v1r1/sub"
     op_type: "Sub"
   }
   node {
     input: "Tps_Entree_TCC_Sortie_R1V1"
     input: "const_fold_opt__672"
     output: "target/dense_features_2/Tps_Entree_TCC_Sortie_R1V1/ExpandDims:0"
     name: "target/dense_features_2/Tps_Entree_TCC_Sortie_R1V1/ExpandDims"
     op_type: "Unsqueeze"
   }
   node {
     input: "target/dense_features_2/Tps_Entree_TCC_Sortie_R1V1/ExpandDims:0"
     input: "target/dense_features_2/Tps_Entree_TCC_

In [145]:

formula = '[model] '

for tag in dico_model['facteurs'].keys():
    if dico_model['facteurs'][tag]['used']:
        if dico_model['facteurs'][tag]['type'] == 'num':
            nom_feat = dico_model['facteurs'][tag]['nom']
            min_val = str(data[nom_feat].min())
            max_val = str(data[nom_feat].max())
            formula = formula + " .Arg(" + '"' + nom_feat +'"'+ ", [" + tag + "]"
            formula = formula + ", " + min_val + ", " + max_val +  ")"

        elif dico_model['facteurs'][tag]['type'] == 'cat':
            nom_feat = dico_model['facteurs'][tag]['nom']
            mod_liste = list(data[nom_feat].unique())
            mod_liste = '","'.join(map(str,mod_liste))
            mod_liste = '"'+mod_liste+'"'
            formula = formula + " .Arg(" + '"' + nom_feat +'"'+ ", [" + tag + "]"
            formula = formula + ", " +mod_liste +  ")"


formula = formula + " .Outputs(" + '"' + 'target' + '"' + ")"


In [146]:
formula

'[model]  .Arg("R1V1_Taux_reduction_epaisseur", [tag_10260], 8.496718988185847, 28.977342970827657) .Arg("R1V1_Refoulement", [tag_10261], 0.0, 301.874572753906) .Arg("Tps_Entree_TCC_Sortie_R1V1", [tag_35633], 22.24892807006837, 130.7103796005249) .Arg("LARG_BRAME_THEOR", [tag_3931], 750.0, 1922.44714355469) .Arg("teta_moy_v1r1", [tag_9323], 1100.06127929688, 1293.52380371094) .Arg("Longueur_Brame", [tag_3928], 5.21, 9.85) .Arg("Code_Metal", [tag_3359], "32","31","1","4","40","41","47","48","45","43","33","5","62","61","11","14","63","42","22","2","13","8","6","30","35","36","10","34","3","71","12","53","50","65","81","90","9","70","21","51","52","20") .Outputs("target")'