<a href="https://colab.research.google.com/github/prasanth5reddy/D2L/blob/master/Multilayer%20Perceptrons/dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mounting Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/My Drive/Colab/D2L.AI/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Installing Libraries

In [2]:
!pip install mxnet



Importing Libraries

In [0]:
import d2l
from mxnet import autograd, gluon, nd, init
from mxnet.gluon import loss as gloss, nn

Implementation from Scratch

Define Dropout Function

In [0]:
def dropout(X, drop_prob):
  assert 0 <= drop_prob <= 1
  # In this case, all elements are dropped out
  if drop_prob == 1:
    return X.zeros_like()
  mask = nd.random.uniform(0, 1, X.shape) > drop_prob
  return mask * X / (1.0 - drop_prob)

In [5]:
X = nd.arange(16).reshape((2, 8))
print(dropout(X, 0))
print(dropout(X, 0.5))
print(dropout(X, 1))


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @cpu(0)>

[[ 0.  0.  0.  0.  8. 10. 12.  0.]
 [16.  0. 20. 22.  0.  0.  0. 30.]]
<NDArray 2x8 @cpu(0)>

[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>


Define Model Parameters

In [0]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
  param.attach_grad()

Define the Model

In [0]:
drop_prob1, drop_prob2 = 0.2, 0.5

def net(X):
  X = X.reshape(shape=(-1, num_inputs))
  H1 = (nd.dot(X, W1) + b1).relu()
  # use dropout only when training
  if autograd.is_training():
    # Add a dropout layer after the first fully connected layer
    H1 = dropout(H1, drop_prob1)
  H2 = (nd.dot(H1, W2) + b2).relu()
  if autograd.is_training():
    # Add a dropout layer after the second fully connected layer
    H2 = dropout(H2, drop_prob2)
  return nd.dot(H2, W3) + b3

Training and Testing

In [8]:
num_epochs, lr, batch_size = 10, 0.5, 256
loss = gloss.SoftmaxCELoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 1.1049, train acc 0.570, test acc 0.760
epoch 2, loss 0.5694, train acc 0.788, test acc 0.834
epoch 3, loss 0.4894, train acc 0.821, test acc 0.852
epoch 4, loss 0.4463, train acc 0.836, test acc 0.862
epoch 5, loss 0.4170, train acc 0.848, test acc 0.865
epoch 6, loss 0.3945, train acc 0.857, test acc 0.872
epoch 7, loss 0.3780, train acc 0.862, test acc 0.867
epoch 8, loss 0.3619, train acc 0.868, test acc 0.865
epoch 9, loss 0.3559, train acc 0.870, test acc 0.880
epoch 10, loss 0.3413, train acc 0.874, test acc 0.880


Concise Implementation

In [0]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
        # Add a dropout layer after the first fully connected layer
        nn.Dropout(drop_prob1),
        nn.Dense(256, activation='relu'),
        # Add a dropout layer after the second fully connected layer
        nn.Dropout(drop_prob2),
        nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))

In [10]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)

epoch 1, loss 1.1727, train acc 0.548, test acc 0.777
epoch 2, loss 0.5844, train acc 0.782, test acc 0.833
epoch 3, loss 0.4983, train acc 0.819, test acc 0.853
epoch 4, loss 0.4471, train acc 0.838, test acc 0.856
epoch 5, loss 0.4219, train acc 0.846, test acc 0.868
epoch 6, loss 0.3989, train acc 0.853, test acc 0.872
epoch 7, loss 0.3806, train acc 0.861, test acc 0.868
epoch 8, loss 0.3668, train acc 0.867, test acc 0.872
epoch 9, loss 0.3544, train acc 0.871, test acc 0.874
epoch 10, loss 0.3486, train acc 0.871, test acc 0.872
