# Entrenamiento

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

### TF-Keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.metrics import RootMeanSquaredError


In [None]:
# Actual dir
path = "/dataset"
if path not in os.getcwd():
    os.chdir(os.getcwd()+path) 
print("Path actual",os.getcwd())


In [None]:
#### HIPERPARAMETROS
VERBOSE = 0
VERBOSE_TRAIN = 0
EPOCHS = 50
KFOLD = 5
BATCH_SIZE = 28 
TEST_SIZE = 0.3
VAL_SIZE = 0.2

FIRST_LAYER = [10,25,50, 75] # Probar con 10, 25, 50 en la primera capa
HIDDEN_LAYERS = [[],[5], [5,2]] # Probar sin capa oculta, con 2 neuronas o con 5 y con dos capas ocultas de 5 y 2

In [None]:
df = pd.read_csv('dataset_entrenamiento.csv', header='infer')
Y = df['price']
X = df[df.columns.difference(['price'])]


## Division del conjunto de datos

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=4815)

In [None]:
print("Muestras en train:",X_train.shape)
print("Muestras en test:",X_test.shape)

## Regresion con Redes Neuronales (Keras)

### Construyendo el modelo

In [None]:
def create_model(input_dimension, first_layer, hidden_layers = []):
    
    model = Sequential()
    model.add(Dense(first_layer, input_dim=input_dimension, kernel_initializer='normal', activation='relu'))
    for n_neural in hidden_layers:
        model.add(Dense(n_neural, kernel_initializer='normal', activation='relu'))

    # Compile model
    model.add(Dense(1, kernel_initializer='normal'))    
    if VERBOSE > 0:
        print(model.summary())
    model.compile(loss='mean_squared_error', optimizer = "rmsprop",metrics = ["RootMeanSquaredError"])
    return model

### Ejecutando y testeando el modelo

In [None]:
### AUX PLOT FUNCTION
def plot_train(train,val, ylabel, title):
    plt.plot(train)
    plt.plot(val)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def testear(X_test, Y_test, model):
    print("... Test ...")
    Y_pred = model.predict(X_test)
    rmse = RootMeanSquaredError(dtype='float64')
    print("RMSE:",round(rmse(Y_pred,Y_test).numpy()))
    result = pd.DataFrame({"pred": Y_pred.flatten(), "real": Y_test, "diferencia": abs(Y_pred.flatten() - Y_test)})
    print("Nos equivocamos de media (MAE): ",round(result["diferencia"].sum() / len(result["diferencia"])))  
    
def kfolds_folds(muestras_por_fold, X_train, Y_train):
    distancias = np.zeros(KFOLD) 
    kfolds = list(range(KFOLD))
    for a in range(len(distancias)):
        distancias[a]= a * muestras_por_fold
    
    distancias = np.append(distancias, [X_train.shape[0]])
    X_kfolds, Y_kfolds =[], []
    
    for i in  range(len(distancias)-1):
        inicio = int(distancias[i])
        fin = int(distancias[i+1])
        X_kfolds.append(X_train[inicio:fin])
        Y_kfolds.append(Y_train[inicio:fin])

    return  X_kfolds, Y_kfolds

In [None]:
def performace():
    for neuronas in FIRST_LAYER:
        for hidden in HIDDEN_LAYERS:
            print(" Perfomance of Model with a layer = ", neuronas," (n neural) and hidden ", hidden)

            input_dimension = X_train.shape[1]
            model = create_model(input_dimension, first_layer = neuronas, hidden_layers = hidden)
            
            if KFOLD > 0:
                kfolds = []
                muestras_por_fold = round(X_train.shape[0] / KFOLD)
                for i in range(0,KFOLD):          
                    X_folds, Y_folds = kfolds_folds(muestras_por_fold, X_train, Y_train)
                    Y_train_fold = pd.Series([], dtype='float').append(Y_folds[:i]+Y_folds[i+1:])
                    X_train_fold = pd.concat(X_folds[:i]+X_folds[i+1:])

                    history = model.fit(X_train_fold, Y_train_fold, validation_data=(X_folds[i], Y_folds[i]), 
                                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE_TRAIN)
                    title = "Red neuronal, arquitectura: "+str(neuronas)+" "+ str(hidden)
                    RMSE= history.history["root_mean_squared_error"][EPOCHS-1]
                    kfolds.append(RMSE)
                    print(f"--- {i+1} folds")
            else:
                history = model.fit(X_train, Y_train, validation_split = 0.2, 
                                    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE_TRAIN)
                title = "Red neuronal, arquitectura: "+str(neuronas)+" "+ str(hidden)
                if VERBOSE > 0:
                    plot_train(history.history["loss"], history.history["val_loss"], "Loss", title)
                    plot_train( history.history["root_mean_squared_error"], history.history["val_root_mean_squared_error"], "RMSE", title)
                
            print(f"RMSE mean of {KFOLD} KFOLD", round(np.mean(kfolds)))
            testear(X_test,Y_test, model)

In [None]:
performace()