## Toy example of 1D linear regression with Keras
To demonstrate the basic workflow

### Create features and targets ("X" and "Y")

In [None]:
import numpy as np

hidden_slope = 5.0
hidden_bias = 5.0

hidden_model = lambda x: hidden_slope * x + hidden_bias

samples = 100

X = np.linspace(1, 10, samples)
X = np.reshape(X, (-1, 1))

Y_noise_std = 10

Y = hidden_model(X) + np.random.randn(X.shape[0], X.shape[1]) * Y_noise_std

from sklearn.utils import shuffle
X, Y = shuffle(X, Y, random_state = 42)

print('X shape: (%d, %d)' % X.shape)
print('Y shape: (%d, %d)' % Y.shape)

### Visualize features and targets

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(X, Y, 'bo')
plt.plot(X, hidden_model(X), 'r--')

### Split data to training and validation sets and preprocess using StandardScaler

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, random_state = 42)

X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)

Y_scaler = StandardScaler()
Y_train = Y_scaler.fit_transform(Y_train)

plt.plot(X_train, Y_train, 'bo')
plt.plot(X_scaler.transform(X), Y_scaler.transform(hidden_model(X)), 'r--')
plt.xlabel('X scaled')
plt.ylabel('Y scaled')

### Create Keras model with one layer containing one node with linear activation

In [None]:
from keras.models import Sequential
model = Sequential()

from keras.layers import Dense
from keras.activations import linear
model.add(Dense(1, activation=linear, input_dim = 1))

from keras import optimizers
optimizer = optimizers.SGD(lr = 0.05)

from keras import losses
loss = losses.mean_squared_error

model.compile(optimizer = optimizer, loss = loss, metrics = ['mse'])

model.summary()

### Train the model, plot training and validation loss vs. epochs

In [None]:
history = model.fit(X_train, Y_train, epochs = 50, verbose = 1, validation_split=0.20)

history_dict = history.history

epochs = len(history_dict['loss'])

plt.plot(range(1, epochs + 1), history_dict['loss'], 'bo-', label = 'Training loss')
plt.plot(range(1, epochs + 1), history_dict['val_loss'], 'rs--', label = 'Validation loss')

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

### Predict targets for test set, plot and compare to model noise

In [None]:
def plot_predictions(fitted_model):
    Y_predicted = Y_scaler.inverse_transform(fitted_model.predict(X_scaler.transform(X_test)))

    plt.figure()
    plt.plot(X_test, Y_test, 'bo', label = 'Test set')
    plt.plot(X_test, Y_predicted, 'ro', label = 'Predicted')
    plt.plot(X, hidden_model(X), 'k--', label = 'Hidden model')
    plt.legend()

    plt.figure()
    plt.plot(X_scaler.inverse_transform(X_train), Y_scaler.inverse_transform(Y_train), 'ko', label = 'Training set')
    plt.plot(X_scaler.inverse_transform(X_train), Y_scaler.inverse_transform(fitted_model.predict(X_train)), 'go', label = 'Training set predicted')
    plt.plot(X, hidden_model(X), 'k--', label = 'Hidden model')
    plt.legend()
    
plot_predictions(model)

Y_predicted = Y_scaler.inverse_transform(model.predict(X_scaler.transform(X_test)))
from sklearn.metrics import mean_squared_error
print('RMS error %.1f (noise %.1f)' % (np.sqrt(mean_squared_error(Y_test, Y_predicted)), Y_noise_std))

### Example with [KerasRegressor](https://keras.io/scikit-learn-api/) scikit-learn integration

In [None]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer

def build_model(hidden_layers = 0, units = 3, lr = 0.05):
    # print('Creating model with learning rate %.3f, %d hidden layers (%d units)' % (lr, hidden_layers, units))
    from keras.models import Sequential
    model = Sequential()

    from keras.layers import Dense
    from keras.activations import linear, relu
    
    has_first_layer = False
    
    for i in range(0, hidden_layers):
        if not has_first_layer:
            model.add(Dense(units=units, activation=relu, input_dim=1))
            has_first_layer = True
        else:
            model.add(Dense(units = units, activation=relu))
            
    if has_first_layer:
        model.add(Dense(units = 1, activation=linear))
    else:
        model.add(Dense(units = 1, activation=linear, input_dim=1))

    from keras import optimizers
    optimizer = optimizers.SGD(lr)

    from keras import losses
    loss = losses.mean_squared_error
    
    model.compile(optimizer = optimizer, loss = loss, metrics = ['mse'])
    # print(model.summary())
    return model

def rmse(y_true, y_pred):
    if y_pred.ndim == 1:
        y_pred = np.expand_dims(y_pred, 1) # Required as regressor predictions are 1D arrays
    assert y_pred.shape[0] == y_true.shape[0]
    assert y_pred.shape[1] == y_true.shape[1]
    assert y_pred.ndim == y_true.ndim
    scaled_diff = Y_scaler.inverse_transform(y_true) - Y_scaler.inverse_transform(y_pred)
    return np.sqrt(np.mean(scaled_diff * scaled_diff))

rmse_scorer = make_scorer(rmse, greater_is_better = False)

regressor = KerasRegressor(build_fn=build_model, epochs = 50, lr = 0.05, verbose = 0)

### Example of fitting the KerasRegressor, note that parameters are tunable

In [None]:
regressor.fit(X_train, Y_train, epochs = 50)

plot_predictions(regressor)

Y_predicted = Y_scaler.inverse_transform(np.expand_dims(regressor.predict(X_scaler.transform(X_test)), 1))
rmse_mean = rmse(Y_scaler.transform(Y_predicted), Y_scaler.transform(Y_test))

print('RMS error %.1f (noise %.1f)' % (rmse_mean, Y_noise_std))

### Example of cross-validation with scikit-learn
Uses the default parameters given for regressor above

In [None]:
scores = cross_validate(regressor, 
                        X_train, 
                        Y_train, 
                        scoring = { 
                            'rmse': rmse_scorer
                        },
                        cv = 5, 
                        verbose = 1)

print('Mean CV RMSE error: %.2f' % np.mean(scores['test_rmse']))
import pandas as pd
cv_scores = pd.DataFrame.from_dict(scores)
cv_scores

### Grid search example
Both model parameters and fitting parameters can be optimized

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = [{
    'epochs': [25, 50],
    'lr': [0.01],
    'hidden_layers': [0, 1, 2],
    'units': [3]
}]

grid_search = GridSearchCV(regressor,
                           param_grid, 
                           cv = 5, 
                           scoring = { 
                             'rmse': rmse_scorer
                           },
                          verbose = 2,
                          refit = 'rmse'
                          )

grid_search.fit(X_train, Y_train)

print('Best params: %s, best score: %.2f' % (grid_search.best_params_, grid_search.best_score_))

import pandas as pd

results = pd.DataFrame.from_dict(grid_search.cv_results_)

In [None]:
results

In [None]:
plot_predictions(grid_search.best_estimator_)

from sklearn.metrics import mean_squared_error
Y_predicted = Y_scaler.inverse_transform(grid_search.best_estimator_.predict(X_scaler.transform(X_test)))
print('RMS error %.1f (noise %.1f)' % (np.sqrt(mean_squared_error(Y_test, Y_predicted)), Y_noise_std))