In [None]:
# loading data
import importlib
import load_transform_pipeline #import the module here, so that it can be reloaded.
importlib.reload(load_transform_pipeline)
import pickle
file_path = '../models/data.pkl'
data = pickle.load(open(file_path, 'rb'))

X_train_df = data['X_train_df']
X_test_df = data['X_test_df']

X_train = data['X_train']
X_test = data['X_test']

y_train = data['y_train']
y_test = data['y_test']

transform_pipeline = data['transform_pipeline']

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras.layers import Dropout
from keras import optimizers
from keras import backend as K
from keras.layers import BatchNormalization, Activation

def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

model = Sequential()


model.add(Dense(1000, input_shape=(X_train.shape[1],), kernel_regularizer=regularizers.l2(0.003)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.33))

model.add(Dense(1000))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))

model.add(Dense(800))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))

model.add(Dense(700))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.3))

model.add(Dense(600))
model.add(Activation('relu'))

model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_logarithmic_error', metrics=['mae', r2_keras])
# mean_squared_logarithmic_error
# mean_squared_error
model.summary()

In [None]:
from keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=4, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=20, restore_best_weights=True, verbose=1)
callbacks_list = [early_stopping, reduce_lr]

model.optimizer.learning_rate.assign(0.001)

history = model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=512,
    verbose=1,
    callbacks=callbacks_list,
    validation_split=.25
)

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss) + 1)
import matplotlib.pyplot as plt

plt.figure(figsize=(12,8))
plt.title('LOSS')
plt.plot(epochs, loss)
plt.ylim((0, .5))
plt.plot(epochs, val_loss)
plt.xticks(ticks=epochs)
plt.ylabel('Loss')
plt.legend(['Training loss', 'Validation loss'])
plt.show()

mae = history.history['mae']
val_mae = history.history['val_mae']
plt.figure(figsize=(12,8))
plt.title('MAE')
plt.plot(epochs, mae)
plt.plot(epochs, val_mae)
plt.xticks(ticks=epochs)
plt.ylabel('Mae')
plt.legend(['Training mae', 'Validation mae'])
plt.show()


acc = history.history['r2_keras']
val_acc = history.history['val_r2_keras']
plt.figure(figsize=(12,8))
plt.title('R2')
plt.plot(epochs, acc)
plt.plot(epochs, val_acc)
plt.xticks(ticks=epochs)
plt.ylabel('R2')
plt.ylim((.4, None))
plt.legend(['Training r2', 'Validation r2'])
plt.show()

In [None]:
y_predicted = model.predict(X_train)
y_test_predicted = model.predict(X_test)

In [None]:
lims = (0, 500)
alpha = .01

plt.figure(figsize=(12,8))
plt.scatter(y_train, y_predicted, alpha=alpha)
plt.scatter(y_train, y_train, alpha=alpha)
plt.xlabel('Valor real por noche')
plt.ylabel('Valor predicho por noche')
plt.xlim(lims)
plt.ylim(lims)
plt.show()


plt.figure(figsize=(12,8))
plt.scatter(y_test, y_test_predicted[:,0], alpha=alpha)
plt.scatter(y_test, y_test, alpha=alpha)
plt.xlabel('Valor real por noche')
plt.ylabel('Valor predicho por noche')
plt.xlim(lims)
plt.ylim(lims)
plt.show()

In [None]:
df_predicted = X_train_df.copy()

df_predicted.loc[:, 'price'] = y_train
df_predicted.loc[:, 'predicted'] = y_predicted[:,0]
df_predicted.loc[:, 'pred_ratio'] = y_predicted[:,0] / y_train

df_predicted[df_predicted.price>1000][['price', 'predicted', 'pred_ratio', 'listing_url', 'name']].sort_values('pred_ratio')