In [None]:
# import all packages

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import zipfile

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
with zipfile.ZipFile("/kaggle/input/sberbank-russian-housing-market/train.csv.zip", 'r') as zip_ref:
    zip_ref.extractall("/data")

In [None]:
os.listdir("/data")

In [None]:
# read the data
df = pd.read_csv('/data/train.csv')

df.head()

In [None]:
# import packages 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler

# simple data cleaning
X = df.select_dtypes('number').drop(['id', 'price_doc'], axis=1)
y = np.log1p(df['price_doc'])

# impute the missing values and create the missing value indicator variables for each numeric column.
numeric_cols = X.columns.values

for col in numeric_cols:
    missing = X[col].isnull()
    num_missing = np.sum(missing)

    if num_missing > 0:  
        print('Imputing missing values for: {}'.format(col))
        df['{}_ismissing'.format(col)] = missing
        med = X[col].median()
        X[col] = X[col].fillna(med)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


# Need to scale the features for neural networks, otherwise the training doesn't converge.
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

X_test_scaled = scaler.transform(X_test)

### Parameters:
* num_hidden_layers 
* neurons_per_layer
* dropout_rate
* activation
* optimizer
* learning_rate
* batch_size

In [None]:
# This returns a multi-layer-perceptron model in Keras.
def get_keras_model(num_hidden_layers, 
                    num_neurons_per_layer, 
                    dropout_rate, 
                    activation):
    # create the MLP model.
    
    # define the layers.
    inputs = tf.keras.Input(shape=(X_train_scaled.shape[1],))  # input layer.
    x = layers.Dropout(dropout_rate)(inputs) # dropout on the weights.
    
    # Add the hidden layers.
    for i in range(num_hidden_layers):
        x = layers.Dense(num_neurons_per_layer, 
                         activation=activation)(x)
        x = layers.Dropout(dropout_rate)(x)
    
    # output layer.
    outputs = layers.Dense(1, activation='linear')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    return model
    

# This function takes in the hyperparameters and returns a score (Cross validation).
def keras_mlp_cv_score(parameterization, weight=None):
    
    model = get_keras_model(parameterization.get('num_hidden_layers'),
                            parameterization.get('neurons_per_layer'),
                            parameterization.get('dropout_rate'),
                            parameterization.get('activation'))
    
    opt = parameterization.get('optimizer')
    opt = opt.lower()
    
    learning_rate = parameterization.get('learning_rate')
    
    if opt == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif opt == 'rms':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    NUM_EPOCHS = 50
    
    # Specify the training configuration.
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=['mse'])

    data = X_train_scaled
    labels = y_train.values
    
    # fit the model using a 20% validation set.
    res = model.fit(data, labels, epochs=NUM_EPOCHS, batch_size=parameterization.get('batch_size'),
                    validation_split=0.2)
    
    # look at the last 10 epochs. Get the mean and standard deviation of the validation score.
    last10_scores = np.array(res.history['val_loss'][-10:])
    mean = last10_scores.mean()
    sem = last10_scores.std()
    
    # If the model didn't converge then set a high loss.
    if np.isnan(mean):
        return 9999.0, 0.0
    
    return mean, sem

In [None]:
# Define the search space.
parameters=[
    {
        "name": "learning_rate",
        "type": "range",
        "bounds": [0.0001, 0.5],
        "log_scale": True,
    },
    {
        "name": "dropout_rate",
        "type": "range",
        "bounds": [0.01, 0.5],
        "log_scale": True,
    },
    {
        "name": "num_hidden_layers",
        "type": "range",
        "bounds": [1, 10],
        "value_type": "int"
    },
    {
        "name": "neurons_per_layer",
        "type": "range",
        "bounds": [1, 300],
        "value_type": "int"
    },
    {
        "name": "batch_size",
        "type": "choice",
        "values": [8, 16, 32, 64, 128, 256],
    },
    
    {
        "name": "activation",
        "type": "choice",
        "values": ['tanh', 'sigmoid', 'relu'],
    },
    {
        "name": "optimizer",
        "type": "choice",
        "values": ['adam', 'rms', 'sgd'],
    },
]

In [None]:
!pip install ax-platform

In [None]:
# import more packages
from ax.service.ax_client import AxClient
from ax.utils.notebook.plotting import render, init_notebook_plotting

init_notebook_plotting()

ax_client = AxClient()

# create the experiment.
ax_client.create_experiment(
    name="keras_experiment",
    parameters=parameters,
    objective_name='keras_cv',
    minimize=True)

def evaluate(parameters):
    return {"keras_cv": keras_mlp_cv_score(parameters)}

In [None]:
for i in range(10):
    parameters, trial_index = ax_client.get_next_trial()
    ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters))

In [None]:
# look at all the trials.
ax_client.get_trials_data_frame().sort_values('trial_index')

In [None]:
best_parameters, values = ax_client.get_best_parameters()

# the best set of parameters.
for k in best_parameters.items():
    print(k)

print()

# the best score achieved.
means, covariances = values
print(means)

In [None]:
render(ax_client.get_optimization_trace()) # Objective_optimum is optional.

# Cannot do contour plot because it doesn't use a GP model.

In [None]:
# train the model on the full training set and test.
keras_model = get_keras_model(best_parameters['num_hidden_layers'], 
                              best_parameters['neurons_per_layer'], 
                              best_parameters['dropout_rate'],
                              best_parameters['activation'])

opt = best_parameters['optimizer']
opt = opt.lower()

learning_rate = best_parameters['learning_rate']

if opt == 'adam':
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
elif opt == 'rms':
    optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
else:
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

NUM_EPOCHS = 50

# Specify the training configuration.
keras_model.compile(optimizer=optimizer,
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mse'])

data = X_train_scaled
labels = y_train.values
res = keras_model.fit(data, labels, epochs=NUM_EPOCHS, batch_size=best_parameters['batch_size'])


In [None]:
# Use the model to predict the test values.
test_pred = keras_model.predict(X_test_scaled)
print("MSE with best params: {:.2f}".format(mean_squared_error(y_test.values, test_pred)))