## Entrenamiento
**Solucionado** Posibles soluciones a la indeterminación de la pérdida en el entrenamiento
[1](https://github.com/keras-team/keras/issues/2530)
[2](https://stackoverflow.com/questions/37232782/nan-loss-when-training-regression-network)
[3](https://github.com/keras-team/keras/issues/2134)


In [1]:
# Default python libraries
import os
import json
import datetime
from pathlib import Path

# Third party
import numpy as np
import tensorflow as tf

from model_training import (
    LoadTimeSerie,
    Preprocessing,
    DataGenerator,
    ModelLoader
)

from model_training.utils import (
    fromJSON,
    asJSON,
    set_logger,
)

subtract_mean registered
minmax registered
znormalization registered


In [2]:
# Fijar semilla
seed = 7
np.random.seed(seed)
tf.keras.backend.clear_session()

In [3]:
# Directorio donde guardar resultados de entrenamiento
path_save_train_results = Path("train_results")
path_save_train_results.mkdir(parents=True, exist_ok=True)

In [4]:
# Crear logger para el entrenamiento
logger = set_logger(path_log = "train_results/logs_training.log")

In [5]:
# Cargar datos
logger.info("Cargando datos para entrenamiento.")

list_train = fromJSON("data/list_train.json")
list_val   = fromJSON("data/list_val.json")
list_test  = fromJSON("data/list_test.json")
labels = fromJSON("data/labels.json")
data_config = fromJSON("data/split_config.json")
ts_config = fromJSON("time_series/time_series_config.json")

In [6]:
logger.info("Definiendo features para input y output.")
# nombre de las columnas de las series de tiempo
features = ts_config.get("features")

# Features para 
input_features  = features               # input
output_features = ["sales_usd_day"]      # output

In [7]:
logger.info("Definiendo configuracion de entrenamiento")
# referencia a las columas de la serie de tiempo 
idx_input_features = [features.index(out) for out in input_features] # input
idx_output_features = [features.index(out) for out in output_features] # output

# Modelo a utilizar
model_name = 'LSTM2_X30_y1'#'LSTM1_X30_y1' # Modelo en '/model_training/models'. Estructura <NOMBRE RED>_X<largo serie input>_y<neuronas capa salida>
weights_path=None # Pesos para inicializar la red
output_layer = 'linear' # funcion de activacion en la capa de salida. None -> activacion lineal f(x)=x
n_output = None # neuronas en la ultima capa. None -> len(order_output_model)
model_loader = ModelLoader()
model = model_loader(model_name)
logger.info(f"Modelo {model_name!r} cargado.")

# Batches (batch_size, len_signal, len_feature)  
epochs = 2
batch_size = 128
logger.info(f"Batch {batch_size} y epocas {epochs}")

len_input = data_config.get("len_input") # largo del input 
len_output = data_config.get("len_output") # largo del output, 1 para este problema
logger.info(f"len_input={len_input}, len_output={len_output}.")

optimizer = tf.keras.optimizers.RMSprop(
    learning_rate=0.003,
    rho=0.9,
    momentum=0.0,
    epsilon=1e-07,
    centered=False,
    name="RMSprop"
)
name_optimizer = "RMSprop"
logger.info(f"Optimizador '{optimizer}'")

# Funcion de perdida
loss = tf.keras.losses.MeanSquaredError(
    reduction="auto", 
    name="mean_squared_error"
)
name_loss = "mse"
logger.info(f"Funcion de perdida '{loss}'")

# Preprocesamiento
list_min = ts_config.get("list_min")
list_max = ts_config.get("list_max")
list_std = ts_config.get("list_std")
list_mean = ts_config.get("list_mean")

# Para X
preprocessing = Preprocessing([
    #("minmax", dict(axis_signal=0, list_min=list_min, list_max=list_max ))
    ("znormalization", dict(axis_signal=0, list_mean=list_mean, list_std=list_std ))
])
preprocessing.asJSON("train_results/preprocessing.json")
logger.info(f"Guardando preprocessing.json")

# Para y: mismo de X pero en menos features
preprocessing_y = Preprocessing([
    #("minmax", dict(axis_signal=0, list_min=list_min[idx_output_features[0]], list_max=list_max[idx_output_features[0]] ))
    ("znormalization", dict(axis_signal=0, list_mean=list_mean[idx_output_features[0]], list_std=list_std[idx_output_features[0]] ))
])

# Cargar DataGenerator
logger.info("Definiendo configuracion general del DataGenerator")
inputs_data_gen = dict(
    labels=labels,
    idx_feature_y = idx_output_features,
    file_loader = LoadTimeSerie(),
    preprocessing = preprocessing,
    preprocessing_y = preprocessing_y,
    batch_size=batch_size,
)

logger.info("Instanciando DataGenerator para entrenamiento")
train_gen = DataGenerator(
    list_id_ts=list_train,
    shuffle=True, # Se recomienda aleatorizar en el entrenamiento
    **inputs_data_gen
)

logger.info("Instanciando DataGenerator para validacion")
val_gen = DataGenerator(
    list_id_ts=list_val,
    shuffle=False, # No es necesario aleatorizar para la validacion
    **inputs_data_gen
)

Modelo 'LSTM2_X30_y1' cargado correctamente
Pipeline configuration saved at 'train_results/preprocessing.json'


In [8]:
logger.info("Compilando modelo")
model.compile(optimizer=optimizer, loss=loss)#, metrics=metrics)

In [9]:
logger.info("Guardando arquitectura")
model_json = model.to_json()
architecture = f"architecture-{model_name}-{output_layer}.json"
path_save_model = f"train_results/{architecture}"
with open(path_save_model, "w") as json_file:
    json_file.write(model_json)
logger.info(f"Arquitectura guardada en {path_save_model!r}")

In [10]:
logger.info("Agregando callbacks")
# ModelCheckpoint
weights_file = f'{path_save_train_results}/weights-{model_name}-' + 'epoch{epoch:03d}-val_loss{val_loss:.3f}.hdf5'

# Tensorboard
now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = os.path.join(
    "logs",
    f"{model_name}-{now}",
)

# Callbacks
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath=weights_file, save_best_only=True, save_weights_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=0.0001),
    tf.keras.callbacks.EarlyStopping(patience=10),
    tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
]

In [11]:
logger.info("Comenzando entrenamiento.")
history_train = model.fit(
    x=train_gen,
    epochs=epochs,
    validation_data=val_gen,
    callbacks = callbacks,
)

Epoch 1/2
Epoch 2/2


In [12]:
logger.info("Generando configuracion de entrenamiento")

training_config = dict(
        input_features  = features,
        output_features = ["venta_clp_dia"],
        batch_size = batch_size,
        epochs = epochs,
        loss = name_loss,
        optimizer = name_optimizer,
        model_name = model_name
)

In [13]:
training_config

{'input_features': ['venta_unidades_dia', 'venta_clp_dia', 'is_promo'],
 'output_features': ['venta_clp_dia'],
 'batch_size': 128,
 'epochs': 2,
 'loss': 'mse',
 'optimizer': 'RMSprop',
 'model_name': 'LSTM2_X30_y1'}

In [14]:
path_save_training_config = path_save_train_results.joinpath("training_config.json")
asJSON(training_config, str(path_save_training_config),sort_keys=False)
logger.info("Configuracion de entrenamiento guardada")