In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline

from IPython.display import SVG

from keras.wrappers.scikit_learn import KerasRegressor
from keras.models import Sequential
from keras.layers import InputLayer, Dense, Dropout
from keras.utils.vis_utils import model_to_dot
from keras.callbacks import CSVLogger, ModelCheckpoint

np.random.seed(1234)

%matplotlib inline

In [None]:
boston = load_boston()

boston_df = pd.DataFrame(boston['data'], columns=boston['feature_names'])
boston_df['y'] = boston['target']

x_cols = boston['feature_names']

print(boston['DESCR'])
boston_df.head()

## Split data intro train, valid and test

In [None]:
train_df, test_df = train_test_split(boston_df, test_size=0.2)
train_df, valid_df = train_test_split(train_df, test_size=0.2)

train_df = train_df.copy()
test_df = test_df.copy()
valid_df = valid_df.copy()

print('#train:', len(train_df), '#valid:', len(valid_df), '#test:', len(test_df))

## Let's scale our data

In [None]:
scaller = StandardScaler()

train_df[x_cols] = scaller.fit_transform(train_df[x_cols])
test_df[x_cols] = scaller.transform(test_df[x_cols])
valid_df[x_cols] = scaller.transform(valid_df[x_cols])

train_df.head()

## Let's back to matrices

In [None]:
X_train, y_train = train_df[x_cols].values, train_df['y'].values
X_test, y_test = test_df[x_cols].values, test_df['y'].values
X_valid, y_valid = valid_df[x_cols].values, valid_df['y'].values

## Set up network

In [None]:
model = Sequential([
    Dense(len(x_cols), input_dim=len(x_cols), kernel_initializer='normal', activation='relu'),
    Dense(1, kernel_initializer='normal')
])

model.compile('adam', loss='mean_squared_error')

### Plot the model

In [None]:
SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [None]:
history = model.fit(X_train, y_train, batch_size=16, epochs=600, validation_data=(X_valid, y_valid))

In [None]:
y_pred = model.predict(X_test)

print('MSE:', mean_squared_error(y_test, y_pred))
print('R^2:', r2_score(y_test, y_pred))

## Excersize: try to overfit modifying NN structure (add layers, make them wider)

## Excersize: explore history object and plot training history (aka learning curves)

## Let's use callbacks

Keras contains a callback mechanism allows to call your own function after each epoch, before each epoch, after each batch, etc.

Also Keras includes a bunch of predefined callbacks, for example for checkpining, early stopping, learning rate decay...

In [None]:
model = Sequential([
    Dense(len(x_cols), input_dim=len(x_cols), kernel_initializer='normal', activation='relu'),
    Dense(1, kernel_initializer='normal')
])

model.compile('adam', loss='mean_squared_error')

# set up callbacks
!rm -rf /tmp/checkpoints
!mkdir /tmp/checkpoints

checkpointer = ModelCheckpoint('/tmp/checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5')
logger = CSVLogger('/tmp/log.csv')

history = model.fit(X_train, y_train,
                    batch_size=16,
                    epochs=1000,
                    callbacks=[checkpointer, logger],
                    validation_data=(X_valid, y_valid))

In [None]:
y_pred = model.predict(X_test)

print('MSE:', mean_squared_error(y_test, y_pred))
print('R^2:', r2_score(y_test, y_pred))

## Let's use cross validation

In [None]:
X, y = boston_df[x_cols].values, boston_df['y'].values


def build_fn():
    model = Sequential([
        Dense(len(x_cols), input_dim=len(x_cols), kernel_initializer='normal', activation='relu'),
        Dense(1, kernel_initializer='normal')
    ])
    
    model.compile('adam', loss='mean_squared_error')
    
    return model


estimators = [
    ('standardize', StandardScaler()),
    ('mlp', KerasRegressor(build_fn=build_fn, epochs=300, batch_size=16, verbose=0))
]

pipeline = Pipeline(estimators)
kfold = KFold(n_splits=5, random_state=1234)
results = cross_val_score(pipeline, X, y, cv=kfold, scoring='r2', n_jobs=1)

In [None]:
print('R^2:', results.mean())

## Excersize: play with NN structure. Try a deepper network and a wider ones.