# Entrenamiento

In [451]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

### TF-Keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.metrics import RootMeanSquaredError


In [452]:
# Actual dir
path = "/dataset"
if path not in os.getcwd():
    os.chdir(os.getcwd()+path) 
print("Path actual",os.getcwd())


Path actual /home/iciac/Escritorio/CursoIA/Grupo_5/dataset


In [453]:
#### HIPERPARAMETROS
VERBOSE = 0
VERBOSE_TRAIN = 0
EPOCHS = 50
KFOLD = 5
BATCH_SIZE = 28 
TEST_SIZE = 0.3
VAL_SIZE = 0.2

FIRST_LAYER = [10,25,50, 75] # Probar con 10, 25, 50 en la primera capa
HIDDEN_LAYERS = [[],[5], [5,2]] # Probar sin capa oculta, con 2 neuronas o con 5 y con dos capas ocultas de 5 y 2

In [454]:
df = pd.read_csv('dataset_entrenamiento.csv', header='infer')
Y = df['price']
X = df[df.columns.difference(['price'])]


## Division del conjunto de datos

In [455]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=4815)

In [456]:
print("Muestras en train:",X_train.shape)
print("Muestras en val:",X_val.shape)
print("Muestras en test:",X_test.shape)

Muestras en train: (75977, 10)
Muestras en val: (15196, 10)
Muestras en test: (32562, 10)


## Regresion con Redes Neuronales (Keras)

### Construyendo el modelo

In [457]:
def create_model(input_dimension, first_layer, hidden_layers = []):
    
    model = Sequential()
    model.add(Dense(first_layer, input_dim=input_dimension, kernel_initializer='normal', activation='relu'))
    for n_neural in hidden_layers:
        model.add(Dense(n_neural, kernel_initializer='normal', activation='relu'))

    # Compile model
    model.add(Dense(1, kernel_initializer='normal'))    
    if VERBOSE > 0:
        print(model.summary())
    model.compile(loss='mean_squared_error', optimizer = "rmsprop",metrics = ["RootMeanSquaredError"])
    return model

### Ejecutando y testeando el modelo

In [536]:
### AUX PLOT FUNCTION
def plot_train(train,val, ylabel, title):
    plt.plot(train)
    plt.plot(val)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def testear(X_test, Y_test, model):
    print("... Test ...")
    Y_pred = model.predict(X_test)
    rmse = RootMeanSquaredError(dtype='float64')
    print("RMSE:",round(rmse(Y_pred,Y_test).numpy()))
    result = pd.DataFrame({"pred": Y_pred.flatten(), "real": Y_test, "diferencia": abs(Y_pred.flatten() - Y_test)})
    print("Nos equivocamos de media (MAE): ",round(result["diferencia"].sum() / len(result["diferencia"])))  
    
def KFOLD_index(muestras_por_fold, i):
    distancias = np.zeros(KFOLD) 
    
    for a in range(len(distancias)):
        distancias[a]= a * muestras_por_fold
    ini =  int(distancias[i])
    fin = int(distancias[i] + distancias[(KFOLD-(i+1))])
    ini_val = fin
    fin_val = - ini

    if fin_val == 0:
        fin_val = X_train.shape[0]
    if ini > fin:
        fin = X_train.shape[0]-(fin-1)
    print(ini,fin, ini_val,fin_val)
    return  ini,fin, ini_val,fin_val

In [537]:
def performace():
    for neuronas in FIRST_LAYER:
        for hidden in HIDDEN_LAYERS:
            print(" Perfomance of Model with a layer = ", neuronas," (n neural) and hidden ", hidden)

            input_dimension = X_train.shape[1]
            model = create_model(input_dimension, first_layer = neuronas, hidden_layers = hidden)
            
            kfolds = []
            muestras_por_fold = round(X_train.shape[0] / KFOLD)

            for i in range(0,KFOLD):          
                   
                ini,fin, ini_val,fin_val = KFOLD_index(muestras_por_fold, i)
                
                history = model.fit(X_train[ini: fin], Y_train[ini: fin], validation_data=(X_train[ini_val: fin_val],Y_train[ini_val: fin_val]), 
                                    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE_TRAIN)
                title = "Red neuronal, arquitectura: "+str(neuronas)+" "+ str(hidden)
                if VERBOSE > 0:
                    plot_train(history.history["loss"], history.history["val_loss"], "Loss", title)
                    plot_train( history.history["root_mean_squared_error"], history.history["val_root_mean_squared_error"], "RMSE", title)
                
                RMSE= history.history["root_mean_squared_error"][EPOCHS-1]
                kfolds.append(RMSE)

            print(f"RMSE mean of {KFOLD+1} KFOLD", kfolds.mean())
            testear(X_test,Y_test, model)

In [None]:
performace()

 Perfomance of Model with a layer =  10  (n neural) and hidden  []
0 60780 60780 75977
15195 60780 60780 -15195
