# Entrenamiento

In [197]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

### TF-Keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.metrics import RootMeanSquaredError
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasRegressor

In [198]:
# Actual dir
path = "/dataset"
if path not in os.getcwd():
    os.chdir(os.getcwd()+path) 
print("Path actual",os.getcwd())


Path actual /home/iciac/Escritorio/CursoIA/Grupo_5/dataset


In [199]:
#### HIPERPARAMETROS
VERBOSE = 1
VERBOSE_TRAIN = 0
EPOCHS = 50
KFOLD = 5
BATCH_SIZE = 128 
TEST_SIZE = 0.3
VAL_SIZE = 0.2

FIRST_LAYER = [10,25,50] # Probar con 10, 25, 50 en la primera capa
HIDDEN_LAYERS = [[],[2],[5], [5,2]] # Probar sin capa oculta, con 2 neuronas o con 5 y con dos capas ocultas de 5 y 2

In [200]:
df = pd.read_csv('dataset_entrenamiento.csv', header='infer')
Y = df['price']
X = df[df.columns.difference(['price'])]


## Division del conjunto de datos

In [201]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=4815)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=VAL_SIZE, random_state=4815)

In [202]:
print("Muestras en train:",X_train.shape)
print("Muestras en val:",X_val.shape)
print("Muestras en test:",X_test.shape)

Muestras en train: (60781, 10)
Muestras en val: (15196, 10)
Muestras en test: (32562, 10)


## Regresion con Redes Neuronales (Keras)

### Construyendo el modelo

In [203]:
def create_model(input_dimension, first_layer, hidden_layers = []):
    
    model = Sequential()
    model.add(Dense(first_layer, input_dim=input_dimension, kernel_initializer='normal', activation='relu'))
    for n_neural in hidden_layers:
        model.add(Dense(n_neural, kernel_initializer='normal', activation='relu'))

    # Compile model
    model.add(Dense(1, kernel_initializer='normal'))    
    if VERBOSE > 0:
        print(model.summary())
    model.compile(loss='mean_squared_error', optimizer = "rmsprop",metrics = ["RootMeanSquaredError"])
    return model

### Ejecutando y testeando el modelo

In [204]:
### AUX PLOT FUNCTION
def plot_train(train,val, ylabel, title):
    plt.plot(train)
    plt.plot(val)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def testear(X_test, Y_test, model):
    Y_pred = model.predict(X_test)
    rmse = RootMeanSquaredError(dtype='float64')
    print("RMSE:",round(rmse(Y_pred,Y_test).numpy()))
    result = pd.DataFrame({"pred": Y_pred.flatten(), "real": Y_test, "diferencia": abs(Y_pred.flatten() - Y_test)})
    print("Nos equivocamos de media (MAE): ",round(result["diferencia"].sum() / len(result["diferencia"])))    
    
def train(model):
    estimator = KerasRegressor(build_fn=model, epochs=EPOCH, batch_size=BATCH_SIZE, verbose=0)
    kfold = KFold(n_splits=KFOLD, random_state=4815)
    results = cross_val_score(estimator, X_train, y_train, cv=kfold)  
    print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
    return estimator

In [205]:
def performace():
    for neuronas in FIRST_LAYER:
        for hidden in HIDDEN_LAYERS:

            input_dimension = X_train.shape[1]
            model = create_model(input_dimension, first_layer = neuronas, hidden_layers = hidden)
            history = model.fit(X_train, Y_train, 
                                validation_data=(X_val, Y_val), 
                                epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE_TRAIN)
            title = "Red neuronal, arquitectura: "+str(neuronas)+" "+ str(hidden)
            plot_train(history.history["loss"], history.history["val_loss"], "Loss", title)
            plot_train( history.history["root_mean_squared_error"], history.history["val_root_mean_squared_error"], "RMSE", title)
            print("EPOCH:",)
            testear(X_test,Y_test, model)

In [None]:
performace()

Model: "sequential_52"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_140 (Dense)            (None, 10)                110       
_________________________________________________________________
dense_141 (Dense)            (None, 1)                 11        
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________
None
