## Importar los datos

In [1]:
# loading data
import importlib
import load_transform_pipeline #import the module here, so that it can be reloaded.
importlib.reload(load_transform_pipeline)
import pickle
file_path = '../models/data.pkl'
data = pickle.load(open(file_path, 'rb'))

X_train_df = data['X_train_df']
X_test_df = data['X_test_df']
X_val_df = data['X_val_df']

X_train = data['X_train']
X_test = data['X_test']
X_val = data['X_val']

y_train = data['y_train']
y_test = data['y_test']
y_val = data['y_val']

transform_pipeline = data['transform_pipeline']

print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

(47238, 2479)
(20996, 2479)
(15747, 2479)


## Importar librerias

In [35]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K
from tensorflow.keras.layers import BatchNormalization, Activation, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
# !pip install keras-tuner --user
import kerastuner
from kerastuner.tuners import RandomSearch, BayesianOptimization
from kerastuner.engine.hyperparameters import HyperParameters
import time
import os

## Defino el build_model

In [28]:
def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

def build_model(hp):
    model = keras.models.Sequential()
    
    model.add(Dense(
        hp.Int('input_units', min_value=256, max_value=2000, step=100),
        input_shape=(X_train.shape[1],)
    ))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    for i in range(hp.Int('num_layers', min_value=1, max_value=10, step=1)):
        model.add(Dense(
            hp.Int(f'layer_{i}_units', min_value=100, max_value=1200, step=100),
        ))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(
            hp.Float(f'layer_{i}_dropout', min_value=0, max_value=.5, step=.1)
        ))

    model.add(Dense(1, activation='linear'))
    
    model.compile(optimizer=optimizers.Adam(amsgrad=True), loss='mean_squared_error', metrics=['mae', r2_keras])
    
    return model

## BayesianOptimization

In [43]:
LOG_DIR = os.path.join("..",'models','BayesianOptimization',str(int(time.time())))

tuner = BayesianOptimization(
    build_model,
    objective = 'val_loss',
    max_trials = 2,
    executions_per_trial = 1,
    directory=LOG_DIR,
    project_name='airbnb'
)

## Callbacks

In [44]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, restore_best_weights=True, verbose=1)
callbacks_list = [early_stopping, reduce_lr]

## Search

In [None]:
tuner.search(
    x=X_train.todense(),
    y=y_train,
    verbose=2, # just slapping this here bc jupyter notebook. The console out was getting messy.
    epochs=300,
    batch_size=64,
    callbacks= callbacks_list,
    validation_data=(X_val.todense(), y_val)
)

Train on 47238 samples, validate on 15747 samples
Epoch 1/300
