In [1]:
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from math import floor
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(alpha=0.1)
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)


In [17]:
# hyperparameters for tuning: #of neurons, activation function, optimizer, learning rate, batch size, epochs, # of layers
# #of neurons;10-100 # of epochs:20-100
# neurons = [10, 32, 64]
# epochs: [1,5,10]
# batch_size: [500, 1000, 2000]
# activation: ['relu', 'tanh', 'sigmoid']
# optimizer: ['SGD', 'RMSprop', 'Adam']
# learning_rate: [0.001, 0.01, 0.1]
# momentum: [0.1, 0.3, 0.5, 0.7, 0.9]
# drop-out: [0.0, 0.2, 0.4]


In [2]:
from EDA import get_train, get_test
x_train, y_train = get_train()
x_test = get_test()
print(x_train.shape, y_train.shape, x_test.shape)


(19168, 19) (19168,) (6966, 19)


In [3]:
def define_model(neurons=10, activation='relu', optimizer='SGD', learning_rate=0.1, dropout=0, batch_size=500, epochs=1, init_weights='normal'):
    model = Sequential()
    model.add(Dense(neurons, input_dim=x_train.shape[1], activation=activation, kernel_initializer=init_weights))
    model.add(Dropout(dropout))
    model.add(Dense(neurons, activation=activation, kernel_initializer=init_weights))
    model.add(Dropout(dropout))
    model.add(Dense(neurons, activation=activation, kernel_initializer=init_weights))
    model.add(Dropout(dropout))
    model.add(Dense(neurons, activation=activation, kernel_initializer=init_weights))
    model.add(Dense(neurons, activation=activation, kernel_initializer=init_weights))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [4]:
seed = 8
np.random.seed(seed)
estimator = KerasRegressor(build_fn=define_model, verbose=0)

In [8]:
# Set paramaters
params_nn ={
    'neurons': [10, 32, 64],
    'activation': ['relu', 'tanh', 'sigmoid'],
    'optimizer': ['SGD', 'RMSprop', 'Adam'],
    'learning_rate': [0.001, 0.01, 0.1],
    'dropout': [0.0, 0.2, 0.4],
    'batch_size': [50, 100, 200],
    'epochs': [5,10,15],
    'init_weights': ['uniform', 'normal', 'he_uniform'],
}
# Run Bayesian Optimization

grid_result = RandomizedSearchCV(
    estimator=estimator, 
    param_distributions=params_nn, 
    n_iter=500,
    cv=5, 
    n_jobs=-1,
    verbose = 10,
    scoring='neg_mean_squared_error'
)

In [9]:
grid_result.fit(x_train, y_train)

Fitting 5 folds for each of 500 candidates, totalling 2500 fits


RandomizedSearchCV(cv=5,
                   estimator=<keras.wrappers.scikit_learn.KerasRegressor object at 0x00000245EE51ADD8>,
                   n_iter=500, n_jobs=-1,
                   param_distributions={'activation': ['relu', 'tanh',
                                                       'sigmoid'],
                                        'batch_size': [50, 100, 200],
                                        'dropout': [0.0, 0.2, 0.4],
                                        'epochs': [5, 10, 15],
                                        'init_weights': ['uniform', 'normal',
                                                         'he_uniform'],
                                        'learning_rate': [0.001, 0.01, 0.1],
                                        'neurons': [10, 32, 64],
                                        'optimizer': ['SGD', 'RMSprop',
                                                      'Adam']},
                   scoring='neg_mean_squared_error', verbose=1

In [10]:
y = grid_result.predict(x_test)

In [13]:
def upload(y):
    submission = pd.read_csv('sample_submission.csv')
    submission['SalePrice'] = y
    submission.to_csv('submission.csv', index=False)
    print(submission.head())

array([3181051.8, 1076724.5, 1405832.1, ..., 2639210.2, 2138072.8,
       2437813. ], dtype=float32)