# 1 - Introduction

There is not a lot of code required, but we are going to step over it slowly so that you will know how to create your own models in the future. The steps you are going to cover in this practical assignment are as follows:

1. Load Data
2. Define Model
3. Compile Model
4. Fit Model
5. Evaluate Model
6. Tie It All Together
7. Make Predictions

## Import packages

In [1]:
!pip install mlxtend==0.17.3



You should consider upgrading via the 'c:\users\ilililililililil\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.





In [2]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [5]:
# Clear any logs from previous runs
!del logs -r

NÆo foi poss¡vel encontrar C:\Users\IlIlIlIlIlIlIlIl\UFRN\Aprendizado Profundo\deeplearning\weeks_04_05\logs


In [6]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import scipy.io
import time
from time import gmtime, strftime
import datetime
import os
import pytz
from mlxtend.plotting import plot_decision_regions

%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [7]:
tf.__version__

'2.4.0'

# 2 - Overfit multilayer perceptron

In [8]:
class MyCustomCallback(tf.keras.callbacks.Callback):

  def on_train_begin(self, batch, logs=None):
    self.begins = time.time()
    print('Training: begins at {}'.format(datetime.datetime.now(pytz.timezone('America/Fortaleza')).strftime("%a, %d %b %Y %H:%M:%S")))

  def on_train_end(self, logs=None):
    print('Training: ends at {}'.format(datetime.datetime.now(pytz.timezone('America/Fortaleza')).strftime("%a, %d %b %Y %H:%M:%S")))
    print('Duration: {:.2f} seconds'.format(time.time() - self.begins))    

In [None]:
# overfit mlp for the moons dataset
from sklearn.datasets import make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# generate 2d classification dataset
x, y = make_moons(n_samples=100, noise=0.2, random_state=1)

# split into train and test sets
n_train = 30
train_x, test_x = x[:n_train, :], x[n_train:, :]
train_y, test_y = y[:n_train], y[n_train:]

# define model
model = Sequential()
model.add(Dense(500, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# callbacks tensorboard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=100)

# fit model
history = model.fit(train_x, train_y,
                    validation_data=(test_x, test_y), 
                    epochs=4000, verbose=0,batch_size=32,
                    callbacks=[MyCustomCallback(),tensorboard_callback])

# evaluate the model
_, train_acc = model.evaluate(train_x, train_y, verbose=0)
_, test_acc = model.evaluate(test_x, test_y, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

# plot loss learning curves
plt.subplot(211)
plt.title('Cross-Entropy Loss', pad=-40)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

# plot accuracy learning curves
plt.subplot(212)
plt.title('Accuracy', pad=-40)
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='test')
plt.legend()
plt.tight_layout()
plt.show()

Training: begins at Thu, 29 Jul 2021 08:45:03


In [None]:
from mlxtend.plotting import plot_decision_regions
# Plot decision boundary
plot_decision_regions(test_x,test_y.squeeze(), clf=model,zoom_factor=2.0)
plt.title("Model without regularization")
plt.show()

In [None]:
# Start TensorBoard within the notebook using magics
%tensorboard --logdir logs

# 3 - L2 Regularization



The standard way to avoid overfitting is called **L2 regularization**. It consists of appropriately modifying your cost function, from:
$$J = -\frac{1}{m} \sum\limits_{i = 1}^{m} \left( \small  y^{(i)}\log\left(\hat{y}^{(i)}\right) + (1-y^{(i)})\log\left(1- \hat{y}^{(i)}\right) \right) \tag{1}$$


To:
$$J_{regularized} = \small \underbrace{-\frac{1}{m} \sum\limits_{i = 1}^{m} \large{(}\small y^{(i)}\log\left(\hat{y}^{(i)}\right) + (1-y^{(i)})\log\left(1- \hat{y}^{(i)}\right) \large{)} }_\text{cross-entropy cost} + \underbrace{\frac{1}{m} \frac{\lambda}{2} \sum\limits_l\sum\limits_k\sum\limits_j W_{k,j}^{[l]2} }_\text{L2 regularization cost} \tag{2}$$


In [None]:
# mlp with weight regularization for the moons dataset
from sklearn.datasets import make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# generate 2d classification dataset
x, y = make_moons(n_samples=100, noise=0.2, random_state=1)

# split into train and test sets
n_train = 30
train_x, test_x = x[:n_train, :], x[n_train:, :]
train_y, test_y = y[:n_train], y[n_train:]

# define model
model_l2 = Sequential()
model_l2.add(Dense(500, input_dim=2, activation='relu',
                kernel_regularizer=l2(0.001)))
model_l2.add(Dense(1, activation='sigmoid'))
model_l2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# callbacks tensorboard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=100)

# fit model
history_l2 = model_l2.fit(train_x, train_y, 
                    validation_data=(test_x, test_y),
                    epochs=4000, verbose=0,
                    callbacks=[MyCustomCallback(),tensorboard_callback])

# evaluate the model
_, train_acc = model_l2.evaluate(train_x, train_y, verbose=0)
_, test_acc = model_l2.evaluate(test_x, test_y, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

# plot loss learning curves
plt.subplot(211)
plt.title('Cross-Entropy Loss', pad=-40)
plt.plot(history_l2.history['loss'], label='train')
plt.plot(history_l2.history['val_loss'], label='test')
plt.legend()

# plot accuracy learning curves
plt.subplot(212)
plt.title('Accuracy', pad=-40)
plt.plot(history_l2.history['accuracy'], label='train')
plt.plot(history_l2.history['val_accuracy'], label='test')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
from mlxtend.plotting import plot_decision_regions
# Plot decision boundary
plot_decision_regions(test_x,test_y.squeeze(), clf=model_l2,zoom_factor=2.0)
plt.title("Model with regularization")
plt.show()

In [None]:
# Start TensorBoard within the notebook using magics
%tensorboard --logdir logs

# 4 - Dropout



Finally, **dropout** is a widely used regularization technique that is specific to deep learning. 
**It randomly shuts down some neurons in each iteration.** Watch these two animations to see what this means!

<!--
To understand drop-out, consider this conversation with a friend:
- Friend: "Why do you need all these neurons to train your network and classify images?". 
- You: "Because each neuron contains a weight and can learn specific features/details/shape of an image. The more neurons I have, the more featurse my model learns!"
- Friend: "I see, but are you sure that your neurons are learning different features and not all the same features?"
- You: "Good point... Neurons in the same layer actually don't talk to each other. It should be definitly possible that they learn the same image features/shapes/forms/details... which would be redundant. There should be a solution."
!--> 

<center><img width="600" src="https://drive.google.com/uc?export=view&id=1eNMeI3JkcXQ3_AeOUItFfgf8D6tSexg0"></center>
<br>
<caption><center> <b>Figure 1</b>: Drop-out on the second hidden layer. <br> At each iteration, you shut down (= set to zero) each neuron of a layer with probability $1 - keep\_prob$ or keep it with probability $keep\_prob$ (50% here). The dropped neurons don't contribute to the training in both the forward and backward propagations of the iteration. </center></caption>

<center><img width="600" src="https://drive.google.com/uc?export=view&id=1GfLbHLOQ2xzsDOQZ-d4m0ZmljZeBHAOz"></center>
<caption><center> <b>Figure 2</b>: Drop-out on the first and third hidden layers. <br> $1^{st}$ layer: we shut down on average 40% of the neurons.  $3^{rd}$ layer: we shut down on average 20% of the neurons. </center></caption>


When you shut some neurons down, you actually modify your model. The idea behind drop-out is that at each iteration, you train a different model that uses only a subset of your neurons. With dropout, your neurons thus become less sensitive to the activation of one other specific neuron, because that other neuron might be shut down at any time. 


In [None]:
# mlp with weight regularization for the moons dataset
from sklearn.datasets import make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# generate 2d classification dataset
x, y = make_moons(n_samples=100, noise=0.2, random_state=1)

# split into train and test sets
n_train = 30
train_x, test_x = x[:n_train, :], x[n_train:, :]
train_y, test_y = y[:n_train], y[n_train:]

# define model
model_dropout = Sequential()
model_dropout.add(Dense(500, input_dim=2, activation='relu'))
model_dropout.add(Dropout(0.4))
model_dropout.add(Dense(1, activation='sigmoid'))
model_dropout.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# callbacks tensorboard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=100)

# fit model
history_dropout = model_dropout.fit(train_x, train_y, 
                    validation_data=(test_x, test_y),
                    epochs=4000, verbose=0,
                    callbacks=[MyCustomCallback(),tensorboard_callback])

# evaluate the model
_, train_acc = model_dropout.evaluate(train_x, train_y, verbose=0)
_, test_acc = model_dropout.evaluate(test_x, test_y, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

# plot loss learning curves
plt.subplot(211)
plt.title('Cross-Entropy Loss', pad=-40)
plt.plot(history_dropout.history['loss'], label='train')
plt.plot(history_dropout.history['val_loss'], label='test')
plt.legend()

# plot accuracy learning curves
plt.subplot(212)
plt.title('Accuracy', pad=-40)
plt.plot(history_dropout.history['accuracy'], label='train')
plt.plot(history_dropout.history['val_accuracy'], label='test')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
from mlxtend.plotting import plot_decision_regions
# Plot decision boundary
plot_decision_regions(test_x,test_y.squeeze(), clf=model_dropout,zoom_factor=2.0)
plt.title("Model with dropout")
plt.show()

In [None]:
# Start TensorBoard within the notebook using magics
%tensorboard --logdir logs

# 5 - L2 vs Dropout

In [None]:
def print_analysis(titles,history,loss=True):
  if loss:
    func = "loss"
    func_val = "val_loss"
  else:
    func = "binary_accuracy"
    func_val = "val_binary_accuracy"

  f, axs = plt.subplots(1,len(titles),figsize=(12,6))
   
  for i, title in enumerate(titles):
    axs[i].set_title(title)
    axs[i].plot(history[i].history[func])
    axs[i].plot(history[i].history[func_val])
    axs[i].set_ylabel(func)
    axs[i].set_xlabel('epoch')
    axs[i].legend(['train', 'test'], loc='best')
  
  plt.tight_layout()
  plt.show()

In [None]:
titles = ['Model without regularization','Model with regularization L2','Model with dropout']
hist = [history,history_l2,history_dropout]
print_analysis(titles,hist,loss=True)

In [None]:
def print_regions(titles,models):

  f, axs = plt.subplots(1,len(titles),figsize=(12,4))
   
  for i, title in enumerate(titles):
    plot_decision_regions(test_x,test_y.squeeze(), clf=models[i],zoom_factor=2.0,ax=axs[i])
    axs[i].set_title(title)
  plt.tight_layout()
  plt.show()

In [None]:
models = [model,model_l2,model_dropout]
print_regions(titles,models)

# 6 - Force Small Weights with Weight Constraints

In [None]:
# mlp overfit on the moons dataset with a unit norm constraint
from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.constraints import unit_norm
import matplotlib.pyplot as plt
import os

# generate 2d classification dataset
x, y = make_moons(n_samples=100, noise=0.2, random_state=1)

# split into train and test
n_train = 30
train_x, test_x = x[:n_train, :], x[n_train:, :]
train_y, test_y = y[:n_train], y[n_train:]

# define model
model = Sequential()
model.add(Dense(500, input_dim=2, activation='relu', kernel_constraint=unit_norm()))
#kernel_constraint=tf.keras.constraints.min_max_norm(min_value=-0.2, max_value=1.0)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# callbacks tensorboard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=100)

# fit model
history = model.fit(train_x, train_y,
                    validation_data=(test_x, test_y),
                    epochs=4000, verbose=0,
                    callbacks=[MyCustomCallback(),tensorboard_callback])

# evaluate the model
_, train_acc = model.evaluate(train_x, train_y, verbose=0)
_, test_acc = model.evaluate(test_x, test_y, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

# plot loss learning curves
plt.subplot(211)
plt.title('Cross-Entropy Loss', pad=-40)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

# plot accuracy learning curves
plt.subplot(212)
plt.title('Accuracy', pad=-40)
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='test')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
from mlxtend.plotting import plot_decision_regions
# Plot decision boundary
plot_decision_regions(test_x,test_y.squeeze(), clf=model,zoom_factor=2.0)
plt.title("Model with weights constraints")
plt.show()

In [None]:
# Start TensorBoard within the notebook using magics
%tensorboard --logdir logs

In [None]:
filter = tf.keras.constraints.UnitNorm()
data = np.arange(3).reshape(3, 1).astype(np.float32)
print(data)

In [None]:
filter(data)

In [None]:
np.linalg.norm(filter(data))

In [None]:
np.linalg.norm(data)

In [None]:
data/np.linalg.norm(data)

In [None]:
filter = tf.keras.constraints.UnitNorm()
data = np.arange(6).reshape(3, 2).astype(np.float32)
data

In [None]:
filter(data)

In [None]:
np.linalg.norm(filter(data),axis=0)

In [None]:
np.linalg.norm(data,axis=0)

In [None]:
data/np.linalg.norm(data,axis=0)

In [None]:
# mlp overfit on the moons dataset with a unit norm constraint
from sklearn.datasets import make_moons
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.constraints import unit_norm
import matplotlib.pyplot as plt
import os

# generate 2d classification dataset
x, y = make_moons(n_samples=100, noise=0.2, random_state=1)

# split into train and test
n_train = 30
train_x, test_x = x[:n_train, :], x[n_train:, :]
train_y, test_y = y[:n_train], y[n_train:]

# define model
model = Sequential()
model.add(Dense(500, input_dim=2, activation='relu', 
                kernel_constraint=tf.keras.constraints.min_max_norm(min_value=-0.2, max_value=1.0)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# callbacks tensorboard
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=100)


# fit model
history = model.fit(train_x, train_y,
                    validation_data=(test_x, test_y),
                    epochs=4000, verbose=0,
                    callbacks=[MyCustomCallback(),tensorboard_callback])

# evaluate the model
_, train_acc = model.evaluate(train_x, train_y, verbose=0)
_, test_acc = model.evaluate(test_x, test_y, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

# plot loss learning curves
plt.subplot(211)
plt.title('Cross-Entropy Loss', pad=-40)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

# plot accuracy learning curves
plt.subplot(212)
plt.title('Accuracy', pad=-40)
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='test')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
from mlxtend.plotting import plot_decision_regions
# Plot decision boundary
plot_decision_regions(test_x,test_y.squeeze(), clf=model,zoom_factor=2.0)
plt.title("Model with weights constraints")
plt.show()