In [None]:
pip install keras

In [None]:
pip install tensorflow

## Chapter 5: Shallow Network

In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.datasets import mnist
from tensorflow.keras.optimizers import SGD

from matplotlib import pyplot as plt

In [2]:
import pandas as pd
import numpy as np

In [7]:
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()

In [6]:
(X_train.shape, y_train.shape)

((60000, 28, 28), (60000,))

In [None]:
(X_valid.shape, y_valid.shape)

In [None]:
type(X_train)

In [None]:
# what a '5' looks like in numpy array from MNIST-digit database
X_train[0]

In [None]:
plt.imshow(X_train[0],cmap='cool')

In [None]:
y_train[0]

In [None]:
plt.figure(figsize=(5,5))
for i in range()

In [None]:
for i in range(12):
    plt.subplot(3,4, i+1)
    plt.imshow(X_train[i],cmap='Greys')
    plt.axis('off')
#plt.tight_layout()

plt.show

In [None]:
for i in range(12):
    plt.subplot(3,4, i+1)
    plt.imshow(X_train[i],cmap='Greys')
    plt.axis('off')
plt.tight_layout()

plt.show

In [8]:
# reshapes to 2D and changes type ('float' is a 32-bit; not doing this change Python uses the default: 64-bit... less pc usage)
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1]*X_train.shape[2]).astype('float32')
X_valid = X_valid.reshape(X_valid.shape[0],X_valid.shape[1]*X_valid.shape[2]).astype('float32')

In [None]:
# 'unit8' stores integer between 0 to 255:
X_train[0]

In [9]:
# changing it to values from 0 to 1:
X_train /= 255
X_valid /= 255

In [None]:
X_train[0]

this was what a "5" looks like

In [None]:
y_train[0]

In [None]:
y_valid[0]

#### transforming it to One-Hot Encoding

In [None]:
y_train

In [10]:
import tensorflow as tf

In [11]:
n_classes = 10 # 10 target variables (ie number form 0 to 9)
y_train = tf.keras.utils.to_categorical(y_train,n_classes)
y_valid = tf.keras.utils.to_categorical(y_valid, n_classes)

In [None]:
y_train[0]

## Shallow Network

In [None]:
X_train.shape[1]

In [None]:
# Type of Model
model = Sequential()

# Hidden Layer: 64 neurons
model.add( Dense(64, activation='sigmoid', input_shape=(X_train.shape[1],) ) )

# Output Layer:
model.add( Dense(10, activation='softmax'))

In [None]:
model.summary()

#### Fitting

In [None]:
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01), metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train,
         batch_size=(64*2), # double number of neurons,
         epochs=200, # number of cycles the network is trained
         verbose=1, # progress bar
         validation_data=(X_valid,y_valid)
         )

In [None]:
model.evaluate(X_valid, y_valid)

## Intermediate Network

#### Intermediate Model

In [None]:
model2= Sequential()

# Hidden Layer: 64 neurons
model2.add( Dense(64, activation='relu', input_shape=(X_train.shape[1],) ) ) # uses ReLu as activation instead of sigmoid

# 2nd Hidden Layer: 64 neurons
model2.add( Dense(64, activation='relu')) # additional hidden layer (Intermediate) instead of 1 (Shallow)

# Output Layer: 10 possible outcomes
model2.add( Dense(10, activation='softmax') )

In [None]:
model2.summary()

In [None]:
model2.compile(loss='categorical_crossentropy', # instead of 'MSE' -also 'binary_crossentropy'  if signoid is used as activation
              optimizer=SGD(lr=.1), # instead of 0.01
              metrics=['accuracy']
              )

In [None]:
model2.fit(X_train, y_train,
         batch_size=128,
         epochs=20, # instead of 200
         verbose=1,
         validation_data=(X_valid,y_valid)
         )

In [None]:
model2.evaluate(X_valid, y_valid)

##### Conclusion: using ReLu as activation function, an additional Hidden Layer, and Cross Entropy as Loss function, alll these increases efficiency (ie time to compute) and accuracy (97% vs 87%)

## Improving Deep Networks

### Weight and Bias Initialization

In [12]:
from keras.layers import Activation
from keras.initializers import Zeros, RandomNormal
from keras.initializers import glorot_normal, glorot_uniform

In [None]:
# initializing Biases as Zeros
b_init = Zeros()

In [None]:
# initializing Weights as Glorot Distribution Random numbers
w_init = glorot_normal() # alternative glorot_uniform()

In [None]:
model3 = Sequential()

# Hidden Layer: 64*4 neurons
model3.add( Dense(64*4, activation='relu', input_shape=(X_train.shape[1],) , # uses sigmoid instead of ReLu
                 kernel_initializer=w_init, # Glorot Random
                 bias_initializer=b_init) ) # Zeros

# 2nd Hidden Layer: 64*2 neurons
model3.add( Dense(64*2, activation='relu')) # additional hidden layer (Intermediate) instead of 1 (Shallow)

# Output Layer: 10 possible outcomes
model3.add( Dense(10, activation='softmax') )

In [None]:
model3.summary()

In [None]:
model3.compile(loss='categorical_crossentropy', # instead of 'MSE' -also 'binary_crossentropy' if signoid is used as activation
              optimizer=SGD(lr=.1), # instead of 0.01
              metrics=['accuracy']
              )

In [None]:
model3.fit(X_train, y_train,
         batch_size=128,
         epochs=20, # instead of 200
         verbose=1,
         validation_data=(X_valid,y_valid)
         )

In [None]:
model3.evaluate(X_valid, y_valid)

### Batch Normalization

In [13]:
from keras.layers import normalization

In [14]:
from keras.layers import BatchNormalization

In [None]:
model4 = Sequential()

# Hidden Layer: 64*4 neurons
model4.add( Dense(64*4, activation='relu', input_shape=(X_train.shape[1],) , 
                 kernel_initializer=glorot_normal(), 
                 bias_initializer=b_init) ) 
model4.add(BatchNormalization() ) # Batch Normalization

# 2nd Hidden Layer: 64*2 neurons
model4.add( Dense(64*2, activation='relu')) 
model4.add(BatchNormalization() ) # Batch Normalization

# Output Layer: 10 possible outcomes
model4.add( Dense(10, activation='softmax') )

In [None]:
model4.compile(loss='categorical_crossentropy', # instead of MSE
              optimizer=SGD(lr=.1), # instead of 0.01
              metrics=['accuracy']
              )

In [None]:
model4.fit(X_train, y_train,
         batch_size=128,
         epochs=20, # instead of 200
         verbose=1,
         validation_data=(X_valid,y_valid)
         )

In [None]:
model4.evaluate(X_valid, y_valid)

### Fancy Optimizers

In [None]:
# Optimizer
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,name='Nadam')

In [None]:
model5 = Sequential()

model5.add( Dense(64*4, activation='relu', input_shape=(X_train.shape[1],) , 
                 kernel_initializer=glorot_normal(), 
                 bias_initializer=b_init) ) 
model5.add(BatchNormalization() )

model5.add( Dense(64*2, activation='relu')) 
model5.add(BatchNormalization() )

model5.add( Dense(10, activation='softmax') )

In [None]:
model5.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

In [None]:
model5.fit(X_train, y_train,batch_size=128,epochs=20, verbose=0,validation_data=(X_valid,y_valid) )

model5.evaluate(X_valid, y_valid)

## Deep Neural Network

In [19]:
# Optimizer
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.1, initial_accumulator_value=0.1, epsilon=1e-07)

# Model Design (3 layers)
model_ = Sequential()
model_.add(Dense(64*4,activation='relu',input_shape=(X_train.shape[1],),kernel_initializer=glorot_normal(),bias_initializer=Zeros()) ) 
model_.add(BatchNormalization() )

model_.add(Dense(64*2, activation='relu')) 
model_.add(BatchNormalization() )

model_.add(Dense(64, activation='relu')) 
model_.add(BatchNormalization() )

model_.add(Dense(10, activation='softmax') )

# Hyperparameter Tuning
model_.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

# Fit and Evaluation
model_.fit(X_train, y_train,batch_size=128,epochs=20, verbose=0,validation_data=(X_valid,y_valid) )

model_.evaluate(X_valid, y_valid)



[0.07053099572658539, 0.9832000136375427]

### Overfitting (Dropout)

In [21]:
from keras.layers import Dropout

In [27]:
# Optimizer
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.1, initial_accumulator_value=0.1, epsilon=1e-07)

# Model Design (3 layers)
model_ = Sequential()
model_.add(Dense(64,activation='relu',input_shape=(X_train.shape[1],),kernel_initializer=glorot_normal(),bias_initializer=Zeros()) ) 
model_.add(BatchNormalization() )

model_.add(Dense(32, activation='relu')) 
model_.add(BatchNormalization() )

model_.add(Dense(16, activation='relu')) 
model_.add(BatchNormalization() )
model_.add(Dropout(0.5)) # droput rate

model_.add(Dense(10, activation='softmax') )

# Hyperparameter Tuning
model_.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

# Fit and Evaluation
display(model_.fit(X_train, y_train,batch_size=128,epochs=20, verbose=1,validation_data=(X_valid,y_valid) ))

display(model_.evaluate(X_valid, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1e32a8d12b0>



[0.1236206665635109, 0.9724000096321106]

## Using TensorBoard

In [28]:
from keras.callbacks import TensorBoard # new!

In [29]:
# Set TensorBoard logging directory
tensorboard = TensorBoard('logs/deep-net')

In [32]:
pip install jupyterlab

Note: you may need to restart the kernel to use updated packages.


In [30]:
model_.fit(X_train, y_train,batch_size=128,epochs=20, verbose=1,validation_data=(X_valid,y_valid),
          callbacks=[tensorboard])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1e32f892640>

## Regression (Boston Housing Prices)

In [35]:
from keras.datasets import boston_housing

In [36]:
(Xtrain, ytrain), (Xvalid, yvalid) = boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [37]:
(Xtrain.shape, ytrain.shape)

((404, 13), (404,))

In [39]:
(Xtrain[0], ytrain[0])

(array([  1.23247,   0.     ,   8.14   ,   0.     ,   0.538  ,   6.142  ,
         91.7    ,   3.9769 ,   4.     , 307.     ,  21.     , 396.9    ,
         18.72   ]),
 15.2)

#### Intermediate Net

In [48]:
model_reg = Sequential()

model_reg.add(Dense(32, input_dim=13, activation='relu'))
model_reg.add(BatchNormalization())

model_reg.add(Dense(16, activation='relu'))
model_reg.add(BatchNormalization())
model_reg.add(Dropout(0.2))

model_reg.add(Dense(1, activation='linear'))

In [49]:
model_reg.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 32)                448       
_________________________________________________________________
batch_normalization_35 (Batc (None, 32)                128       
_________________________________________________________________
dense_46 (Dense)             (None, 16)                528       
_________________________________________________________________
batch_normalization_36 (Batc (None, 16)                64        
_________________________________________________________________
dropout_5 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 17        
Total params: 1,185
Trainable params: 1,089
Non-trainable params: 96
__________________________________________________

#### Configure Model

In [50]:
model_reg.compile(loss='mean_squared_error', optimizer='adam')

#### Train Model

##### .train vs .fit

In [None]:
# Using .train and .predict
model_reg.train(Xtrain, ytrain, batch_size=8, epochs=32, verbose=1)
model_reg.predict(Xvalid, yvalid)

In [51]:
# Using .fit
model_reg.fit(Xtrain, ytrain,
              batch_size=8, epochs=32, verbose=1, 
              validation_data=(Xvalid, yvalid))

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.callbacks.History at 0x1e32a0c7850>

In [53]:
# not useful for regression models:
model_reg.evaluate(Xvalid, yvalid)



37.38697052001953

#### Predicting

In [54]:
model_reg.predict(np.reshape(Xvalid[27], [1, Xtrain.shape[1]]))

array([[35.72557]], dtype=float32)

### One Hot Encoding Response Variable to Categorical Codes via Kers' to_categorical