# Librairies nécessaires

In [1]:
import commentjson
import os
#os.chdir('/mnt/batch/tasks/shared/LS_root/mounts/clusters/pythonnb/code/Users/david.mouquet/modeling')


import missingno as msno
import sys
import importlib
import pandas as pd
import math
import datetime
import numpy as np

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers

sys.path.append("../SRC/")
import Utilitaires as utils
importlib.reload(utils)
import RapportModelisation as modelreport
import importFromUV as preprocdata
importlib.reload(preprocdata)
from sklearn.metrics import r2_score

# Lecture des données

In [2]:
file_model_param   = "models_param/TCC-DK-Elec.json"

model_id           = "R1V1_Conso_electricite_En_Charge_ref_2019_V2"
site               = "DK-TCC"

###############################################################

with open(file_model_param, encoding='utf-8') as file:
    dico_model_all = commentjson.load(file)

dico_model = dico_model_all[model_id]



#pkl_model_name      = "resu/models/"+model_id+"_"+ dico_model['type_model'] + "_" +dico_model['freq']+".pkl"

onnx_model_name     = dico_model['mangling']+"."+model_id+".onnx"
nom_model_registre  = dico_model['nom_model_registre']
freq                = dico_model['freq']
uv_mangling         = dico_model['mangling']
nom_data_store      = dico_model['data_store']


ref_periode_debut = datetime.datetime.strptime(dico_model['ref_periode_debut'], '%d/%m/%Y %H:%M:%S').isoformat()
ref_periode_fin   = datetime.datetime.strptime(dico_model['ref_periode_fin'], '%d/%m/%Y %H:%M:%S').isoformat()


data, clean_report = preprocdata.Charger_Preparer_Data(ref_periode_debut = ref_periode_debut, 
                                         ref_periode_fin   = ref_periode_fin,
                                         ipe_tag           = dico_model["tag_modelise"],
                                         dico_du_model     = dico_model,
                                         use_seuil_min     = True,
                                         use_seuil_max     = True,
                                         clean_data        = False,
                                         concat_after      = True,
                                         load_unused_feature = True,
                                         zscore            = 3)

import du tag: tag_4278
import du tag: tag_10260
import du tag: tag_10261
import du tag: tag_35633
import du tag: tag_3931
import du tag: tag_9323
import du tag: tag_3928
import du tag: tag_3359


In [3]:
data.describe()

Unnamed: 0,DK_TCC_Laminoir_R1V1_IPE_ELEC_EN_CHARGE_kWh_BOB,R1V1_Taux_reduction_epaisseur,R1V1_Refoulement,Tps_Entree_TCC_Sortie_R1V1,LARG_BRAME_THEOR,teta_moy_v1r1,Longueur_Brame
count,157161.0,157161.0,157161.0,157161.0,157161.0,157161.0,157161.0
mean,45.84704,23.413113,101.671245,71.524917,1348.975099,1198.351839,8.52407
std,14.98127,2.165151,72.970507,15.172197,196.331746,17.829024,1.18223
min,6.385385,8.496719,0.0,22.248928,750.0,1100.061279,5.21
25%,34.392022,21.004028,34.971924,72.882462,1190.0,1186.693481,7.885
50%,45.430984,24.316536,99.221436,76.252317,1340.0,1198.140381,8.885
75%,56.563919,24.808295,163.442993,78.468355,1490.0,1209.923096,9.505
max,105.832936,28.977343,301.874573,130.71038,1922.447144,1293.523804,9.85


# Coefficient de corrélation

In [4]:
df_num_corr = utils.Compute_Corr_Coef(data=data, dico_model =dico_model)

# Preprocessing

### Découpage des données en train et test

In [5]:
data = data.dropna()

In [6]:
train_dataset = data.sample(frac=0.8, random_state=0)
test_dataset = data.drop(train_dataset.index)
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop(dico_model["tag_name"])
test_labels = test_features.pop(dico_model["tag_name"])

In [7]:
test_features.values.shape

(31432, 7)

In [23]:
test_labels.values

array([34.994, 44.188, 44.723, ..., 41.763, 40.002, 38.725])

### Normalisation des facteurs numériques

In [1]:
inputs = {}

for name, column in train_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

NameError: name 'train_features' is not defined

In [9]:
numeric_inputs = {name:input for name,input in inputs.items()
                  if input.dtype==tf.float32}

x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(train_features[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)

In [10]:
preprocessed_inputs = [all_numeric_inputs]

### Encodage des variables catégorielles

In [11]:
for name, input in inputs.items():
  if input.dtype == tf.float32:
    continue

  lookup = layers.StringLookup(vocabulary=np.unique(train_features[name]))
  one_hot = layers.CategoryEncoding(max_tokens=lookup.vocab_size())

  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)



### Preprocesseur

In [12]:
preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)
model_preprocessing = tf.keras.Model(inputs, preprocessed_inputs_cat)

In [13]:
features_dict = {name: np.array(value) 
                         for name, value in train_features.items()}
features_dict_test = {name: np.array(value) 
                         for name, value in test_features.items()}

# Construction du modèle

In [14]:
def build_model(preprocessing_head, inputs):
  body = tf.keras.Sequential([
    layers.Dense(64, activation='relu'),
    layers.Dense(1)],name = 'target')

  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs=inputs, outputs=result)

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model

In [15]:
model = build_model(model_preprocessing, inputs)

In [16]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Code_Metal (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 LARG_BRAME_THEOR (InputLayer)  [(None, 1)]          0           []                               
                                                                                                  
 Longueur_Brame (InputLayer)    [(None, 1)]          0           []                               
                                                                                                  
 R1V1_Refoulement (InputLayer)  [(None, 1)]          0           []                               
                                                                                            

## Réglage Hyperparamètres

In [40]:
import keras_tuner as kt



In [64]:
tf.convert_to_tensor(inputs)

TypeError: Exception encountered when calling layer "tf.convert_to_tensor" (type TFOpLambda).

Expected any non-tensor type, but got a tensor instead.

Call arguments received by layer "tf.convert_to_tensor" (type TFOpLambda):
  • value={'R1V1_Taux_reduction_epaisseur': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'R1V1_Refoulement': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'Tps_Entree_TCC_Sortie_R1V1': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'LARG_BRAME_THEOR': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'teta_moy_v1r1': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'Longueur_Brame': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'Code_Metal': 'tf.Tensor(shape=(None, 1), dtype=string)'}
  • dtype=None
  • dtype_hint=None
  • name=None

In [52]:
def model_builder(hp):


  hp_units = hp.Int('units', min_value=32, max_value=64, step=10)
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  body = tf.keras.Sequential([
  layers.Dense(units=hp_units, activation='relu'),
  layers.Dense(1)],name = 'target')

  model_preprocessing.summary()
  result = body(model_preprocessing, inputs)
  
  model = tf.keras.Model(inputs=inputs, outputs=result)

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate))

  return model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

# Apprentissage

In [60]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [62]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [17]:
#history = model.fit(x=features_dict, y=train_labels, epochs=20,validation_split=0.2,callbacks=[tensorboard_callback])
history = model.fit(x=features_dict, y=train_labels, epochs=20,validation_data=(features_dict_test,test_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
help(model.fit)

Help on method fit in module keras.engine.training:

fit(x=None, y=None, batch_size=None, epochs=1, verbose='auto', callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_batch_size=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False) method of keras.engine.functional.Functional instance
    Trains the model for a fixed number of epochs (iterations on a dataset).
    
    Args:
        x: Input data. It could be:
          - A Numpy array (or array-like), or a list of arrays
            (in case the model has multiple inputs).
          - A TensorFlow tensor, or a list of tensors
            (in case the model has multiple inputs).
          - A dict mapping input names to the corresponding array/tensors,
            if the model has named inputs.
          - A `tf.data` dataset. Should return a tuple
            of either `(inp

In [None]:
%tensorboard --logdir logs/fit

# Performance globale

In [18]:
train_predictions = model.predict(features_dict).flatten()
test_predictions = model.predict(features_dict_test).flatten() 
print(r2_score(train_labels.values,train_predictions),r2_score(test_labels.values,test_predictions))



In [19]:
test_predictions = model.predict(features_dict_test).flatten()                         



In [20]:
print(r2_score(train_labels.values,train_predictions),r2_score(test_labels.values,test_predictions))

0.9288023489180408 0.928766059576856


In [68]:
true_labels

Date
2019-01-01 05:02:34    31.176076
2019-01-01 05:04:03    47.863317
2019-01-01 05:20:24    34.993941
2019-01-01 05:21:44    61.718423
2019-01-01 05:24:12    42.892313
                         ...    
2019-12-30 03:51:45    37.330078
2019-12-30 03:52:47    40.298854
2019-12-30 03:55:18    34.360765
2019-12-30 03:56:31    37.264575
2019-12-30 03:57:42    37.990932
Name: DK_TCC_Laminoir_R1V1_IPE_ELEC_EN_CHARGE_kWh_BOB, Length: 157161, dtype: float64

In [124]:
features = data.copy()
true_labels = features.pop(dico_model["tag_name"])
features_dico = {name: np.array(value) 
                         for name, value in features.items()}

pred_label = model.predict(features_dico).flatten()                          



In [53]:
df_prediction = pd.Series(index=data.index,data=pred_label)

In [None]:
import plotly.graph_objects as go


# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_prediction.index, y=df_prediction,
                    mode='lines',
                    name='Modele'))
fig.add_trace(go.Scatter(x=true_labels.index, y=true_labels,
                    mode='lines',
                    name='Mesure'))

#fig.update_layout(title='Four n° ' + N_Four + ' Durée entre défournement et prochain enfournement',
#                   xaxis_title='Date',
#                   yaxis_title='minutes')
fig.show()

# Export vers ONNX

In [127]:
importlib.reload(modelreport)

model_type = "réseau de neuronnes"

modelreport_json = modelreport.BuildModelReport(model_type  = model_type,
                                                ref_periode_debut  = datetime.datetime.strftime(data.index[0], '%Y-%m-%d %H:%M:%S')  ,
                                                ref_periode_fin= datetime.datetime.strftime(data.index[-1], '%Y-%m-%d %H:%M:%S'),
                                                clean_report = clean_report,
                                                description = '',
                                                test_data_set = test_data_set,
                                                train_data_set = train_data_set,
                                                fitted_model = fitted_model,
                                                df_num_corr = df_num_corr,
                                                dico_model = dico_model,
                                                data = data)

'target'

In [138]:
import tf2onnx
(onnx_model_proto, storage) = tf2onnx.convert.from_keras(model)

'NoneType' object has no attribute 'name'


'NoneType' object has no attribute 'name'


In [139]:
onnx_model_proto.metadata_props

[]

In [134]:
import tensorflow as tf
import tf2onnx
#model.layers[-1]._name = 'target'
# model.save('model_ts')
# model = tf.keras.models.load_model('model_ts')

tf2onnx.convert.from_keras(model, output_path='model_R1V1_ts_test.onnx')

'NoneType' object has no attribute 'name'


'NoneType' object has no attribute 'name'


(ir_version: 7
 producer_name: "tf2onnx"
 producer_version: "1.13.0 2c1db5"
 graph {
   node {
     input: "R1V1_Taux_reduction_epaisseur"
     input: "R1V1_Refoulement"
     input: "Tps_Entree_TCC_Sortie_R1V1"
     input: "LARG_BRAME_THEOR"
     input: "teta_moy_v1r1"
     input: "Longueur_Brame"
     output: "model_7/model_2/concatenate_2/concat:0"
     name: "model_7/model_2/concatenate_2/concat"
     op_type: "Concat"
     attribute {
       name: "axis"
       i: 1
       type: INT
     }
   }
   node {
     input: "model_7/model_2/concatenate_2/concat:0"
     input: "model_7/model_2/normalization_1/sub/y:0"
     output: "model_7/model_2/normalization_1/sub:0"
     name: "model_7/model_2/normalization_1/sub"
     op_type: "Sub"
   }
   node {
     input: "model_7/model_2/normalization_1/sub:0"
     input: "ConstantFolding/model_7/model_2/normalization_1/truediv_recip:0"
     output: "model_7/model_2/normalization_1/truediv:0"
     name: "model_7/model_2/normalization_1/truediv"
  

In [145]:

formula = '[model] '

for tag in dico_model['facteurs'].keys():
    if dico_model['facteurs'][tag]['used']:
        if dico_model['facteurs'][tag]['type'] == 'num':
            nom_feat = dico_model['facteurs'][tag]['nom']
            min_val = str(data[nom_feat].min())
            max_val = str(data[nom_feat].max())
            formula = formula + " .Arg(" + '"' + nom_feat +'"'+ ", [" + tag + "]"
            formula = formula + ", " + min_val + ", " + max_val +  ")"

        elif dico_model['facteurs'][tag]['type'] == 'cat':
            nom_feat = dico_model['facteurs'][tag]['nom']
            mod_liste = list(data[nom_feat].unique())
            mod_liste = '","'.join(map(str,mod_liste))
            mod_liste = '"'+mod_liste+'"'
            formula = formula + " .Arg(" + '"' + nom_feat +'"'+ ", [" + tag + "]"
            formula = formula + ", " +mod_liste +  ")"


formula = formula + " .Outputs(" + '"' + 'target' + '"' + ")"


In [146]:
formula

'[model]  .Arg("R1V1_Taux_reduction_epaisseur", [tag_10260], 8.496718988185847, 28.977342970827657) .Arg("R1V1_Refoulement", [tag_10261], 0.0, 301.874572753906) .Arg("Tps_Entree_TCC_Sortie_R1V1", [tag_35633], 22.24892807006837, 130.7103796005249) .Arg("LARG_BRAME_THEOR", [tag_3931], 750.0, 1922.44714355469) .Arg("teta_moy_v1r1", [tag_9323], 1100.06127929688, 1293.52380371094) .Arg("Longueur_Brame", [tag_3928], 5.21, 9.85) .Arg("Code_Metal", [tag_3359], "32","31","1","4","40","41","47","48","45","43","33","5","62","61","11","14","63","42","22","2","13","8","6","30","35","36","10","34","3","71","12","53","50","65","81","90","9","70","21","51","52","20") .Outputs("target")'