In [38]:
%run ../talktools.py

<Figure size 432x288 with 0 Axes>

<div class="alert alert-info">sklearn is not built for deep/complex networks such as required in covnets (as we'll see later on). We must go to specialized software (and potentially specialized hardware)</div>


# Deep Learning Frameworks

Almost all frameworks written in low-level C++/C with Python (or other scripting bindings)

### Low-level frameworks

   - Tensorflow (Google) Nov 2015. See https://www.tensorflow.org/api_docs/python/tf
   - Theano
   - Caffe (Berkeley)
   - Torch (Lua)
   - pytorch (Python). https://pytorch.org/docs/stable/index.html
   - CNTK (Microsoft)
   - Chainer
   - PaddlePaddle (Baidu) Aug 2016
   
### High level frameworks (Python)

   - Keras (atop Tensorflow, Theano) - https://www.tensorflow.org/api_docs/python/tf/keras
   - TFLearn 
   - nolearn
   - SkFlow (part of tensorflow)
   - [Lasagne](http://lasagne.readthedocs.io/en/latest/index.html) (atop Theano)
   
<img src="https://pbs.twimg.com/media/DX0lfBNU8AEs8KG.png:large" width="75%">
Source: https://twitter.com/fchollet/status/971863128341323776

<blockquote class="twitter-tweet" data-lang="en"><p lang="en" dir="ltr">TensorFlow is the platform of choice for deep learning in the research community. These are deep learning framework mentions on arXiv over the past 3 months <img src="https://pbs.twimg.com/media/DXy_uc0VAAAIhKG.jpg:small">

&mdash; François Chollet (@fchollet) <a href="https://twitter.com/fchollet/status/971863128341323776?ref_src=twsrc%5Etfw">March 8, 2018</a></blockquote>
<script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>


see also: https://github.com/mbadry1/Top-Deep-Learning

# Keras


"Use Keras if you need a deep learning library that:

Allows for easy and fast prototyping (through user friendliness, modularity, and extensibility).
Supports both convolutional networks and recurrent networks, as well as combinations of the two.
Runs seamlessly on CPU and GPU."

-- keras.io

Let's load up the California housing data as in the previous notebook.

In [None]:
import math

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd

from sklearn import datasets
from sklearn.preprocessing import StandardScaler  

cal_data = datasets.fetch_california_housing()
X = cal_data['data']   # 8 features 
Y = cal_data['target'] # response (median house price)

half = math.floor(len(Y)/2)
train_X = X[:half]
train_Y = Y[:half]
test_X = X[half:]
test_Y = Y[half:]


scaler = StandardScaler()  

# Don't cheat - fit only on training data
scaler.fit(train_X)  
train_X = scaler.transform(train_X)  

# apply same transformation to test data
test_X = scaler.transform(test_X)

In [None]:
test_Y

In [None]:
num_input_features = train_X.shape[1]
print(f'number of input features = {num_input_features}')

In [None]:
import datetime, os

import tensorflow.keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
import tensorflow as tf

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# Print keras version
print(tensorflow.keras.__version__)

from tensorflow import keras
print(f"backend={keras.backend.backend()}")

In [None]:
def nn_clf():
    model = Sequential()
    model.add(Dense(32, input_shape=(num_input_features,), 
                      activation="relu", kernel_initializer='random_uniform'))
    model.add(Dense(32,  activation="relu", kernel_initializer='random_uniform'))
    model.add(Dense(10,  activation="relu", kernel_initializer='random_uniform'))
    model.add(Dense(1, activation="linear", kernel_initializer='random_uniform'))
    model.compile(optimizer='adam', loss='mean_squared_error',  metrics=['mae',"mse"])
    return model

In [None]:
model = nn_clf()

model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

from IPython.display import Image
Image("model_plot.png")

In [None]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score, KFold

clf = KerasRegressor(build_fn=nn_clf, batch_size=32, epochs=50)
clf.fit(train_X, train_Y)

In [None]:
# how well did we do?
import numpy as np
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test_Y, clf.predict(test_X)) ; print("MSE",mse)

plt.figure(figsize=(10,6))
plt.title("NN Regression Residuals - MSE = %.3f" % mse)
plt.scatter(test_Y,clf.predict(test_X),alpha=0.4,s=3)
plt.xlabel("Test Y")
plt.ylabel("Predicted Y")
plt.plot([0.2,5],[0.2,5],c="r")

### A More Complete Example

We want to train and make some decisions of when to stop based on `validation` data. Ultimately, we'd like to see how well our model would do on truly new data (`test`).


In [None]:
train_percentage, valid_percentage, test_percentage = (0.90, 0.05, 0.05)

In [None]:
from sklearn.model_selection import train_test_split

rnd = np.random.RandomState(42)

# make an array with the indices of all the rows in the dataset
ind_arr = np.arange(X.shape[0])
rnd.shuffle(ind_arr)

train_ind, tmp = train_test_split(ind_arr, train_size=train_percentage, random_state=rnd)
valid_ind, test_ind = train_test_split(tmp, 
                                                          train_size=valid_percentage/(valid_percentage + test_percentage), 
                                                          random_state=rnd)

Let's make sure that we're getting all the indicies

In [None]:
assert np.all(np.array(sorted(list(train_ind) + list(valid_ind) + list(test_ind)))  == sorted(ind_arr))

In [None]:
train_ind.shape, test_ind.shape, valid_ind.shape

Now let's scale the inputs

In [None]:
scaler = StandardScaler()  

train_X = X[train_ind]
# Don't cheat - fit only on training data
scaler.fit(train_X)  
train_X = scaler.transform(train_X)  

# apply same transformation to test, validation data
test_X = scaler.transform(X[test_ind])
valid_X = scaler.transform(X[valid_ind])

train_y = Y[train_ind] ; test_y = Y[test_ind] ; valid_y = Y[valid_ind]

assert train_y.shape[0] == train_X.shape[0]
assert test_y.shape[0] == test_X.shape[0]
assert valid_y.shape[0] == valid_X.shape[0]

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(num_input_features,), 
                  activation="relu", kernel_initializer='random_uniform'))
model.add(Dense(32,  activation="relu", kernel_initializer='random_uniform'))
model.add(Dense(5,  activation="relu", kernel_initializer='random_uniform'))
model.add(Dense(1, activation="linear", kernel_initializer='random_uniform'))

The `Model.compile` method in `keras` has a number of input parameters:

```python
compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
```
Usually, you'll set the `optimizer`, `loss`, and `metrics`.

https://keras.io/models/model/

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error',  metrics=['mae',"mse"])

In [None]:
import datetime
import os

from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, \
                                                ReduceLROnPlateau, ModelCheckpoint, TensorBoard

try:
    os.mkdir('nn_results')
except:
    pass

batch_size=64
num_epochs = 200

run_time_string = datetime.datetime.utcnow().isoformat(timespec='minutes')

# define path to save model
model_path = f'nn_results/datalab_nn_{run_time_string}.h5'
print(f"Training ... {model_path}")

# Tensorboard is a project which can ingest learning logs for interactive display...more on that later.
tb = TensorBoard(log_dir='nn_results', histogram_freq=0,
                 write_graph=True, 
                 write_grads=False, 
                 write_images=False, 
                 embeddings_freq=0, 
                 embeddings_layer_names=None, 
                 embeddings_metadata=None, embeddings_data=None)

reduce_lr = ReduceLROnPlateau(monitor='val_mse', factor=0.75,
                              patience=3, min_lr=1e-6, verbose=1, cooldown=0)

csv_logger = CSVLogger(f'nn_results/training_{run_time_string}.log')

earlystop = EarlyStopping(monitor='val_mse', min_delta=0.001, patience=10, \
                          verbose=1, mode='auto')

model_check = ModelCheckpoint(model_path,
        monitor='val_mse', 
        save_best_only=True, 
        mode='min',
        verbose=1)

The `Model.fit` method:

```python
fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1)
```

In [None]:
history = model.fit(x=train_X, y=train_y,
               epochs=num_epochs,
               validation_data=(valid_X, valid_y),
               verbose=1, shuffle=True,
               callbacks=[csv_logger, earlystop, model_check, tb])

In [None]:
!ls nn_results/

Let's look at the history of the training results:

In [None]:
lastest_log_file = !ls -t1 nn_results/training* | head -1
hist_df = pd.read_csv(lastest_log_file[0])
hist_df

This is also available in the return value from `.fit`:

In [None]:
history.history.keys()

In [None]:
import seaborn as sns
sns.set_context("poster")
plt.figure(figsize=(7,6))
plt.title("Training History")
plt.plot(hist_df.index + 1,hist_df["val_mse"] ,alpha=0.4, label="validation")
plt.plot(hist_df.index + 1,hist_df["mse"] ,alpha=0.4, label="training")

plt.xlabel("Epoch")
plt.ylabel("Loss (MSE)")
plt.loglog()
plt.legend()

Take a look at the `tensorboard` notebook as well...

Let's take a look at the predictions on the test set

In [None]:
!ls nn_results

In [None]:
# reload the best model
from tensorflow.keras.models import load_model
import tensorflow as tf

model = load_model("nn_results/datalab_nn_2020-04-13T21:24.h5")

In [None]:
pred_y = model.predict(test_X)
pred_y

In [None]:
# how well did we do?
import numpy as np
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test_y, pred_y[:,0]); print("MSE",mse)

plt.figure(figsize=(10,6))
plt.title("NN Regression Residuals - MSE = %.3f" % mse)
plt.scatter(test_y,pred_y[:,0] ,alpha=0.4,s=3)
plt.xlabel("Test Y")
plt.ylabel("Predicted Y")
plt.plot([0.2,5],[0.2,5],c="r")

How did we do compared to the validation and training data?