<a href="https://colab.research.google.com/github/prasanth5reddy/D2L/blob/master/Modern%20Convolutional%20Networks/batch_normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mounting Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
w_dir = '/content/drive/My Drive/Colab/D2L.AI/'
sys.path.append(w_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Installing Libraries

In [2]:
!pip uninstall mxnet && pip install mxnet-cu100



Importing Libraries

In [0]:
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import nn
import d2l

Implementation from Scratch

In [0]:
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
  if not autograd.is_training():
    X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps)
  else:
    assert len(X.shape) in (2, 4)
    if len(X.shape) == 2:
      mean = X.mean(axis=0)
      var = ((X - mean) ** 2).mean(axis=0)
    else:
      mean = X.mean(axis=(0, 2, 3), keepdims=True)
      var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
      
    X_hat = (X - mean) / nd.sqrt(var + eps)
    moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
    moving_var = momentum * moving_var + (1.0 - momentum) * var
    
  Y = gamma * X_hat + beta
  return Y, moving_mean, moving_var

In [0]:
class BatchNorm(nn.Block):
  def __init__(self, num_features, num_dims, **kwargs):
    super(BatchNorm, self).__init__(**kwargs)
    if num_dims == 2:
      shape = (1, num_features)
    else:
      shape = (1, num_features, 1, 1)
      
    self.gamma = self.params.get('gamma', shape=shape, init=init.One())
    self.beta = self.params.get('beta', shape=shape, init=init.Zero())
    
    self.moving_mean = nd.zeros(shape)
    self.moving_var = nd.zeros(shape)
    
  def forward(self, X):
    if self.moving_mean.context != X.context:
      self.moving_mean = self.moving_mean.copyto(X.context)
      self.moving_var = self.moving_var.copyto(X.context)
      
    Y, self.moving_mean, self.moving_var = batch_norm(X, self.gamma.data(), self.beta.data(), self.moving_mean,
                                                      self.moving_var, eps=1e-5, momentum=0.9)
    return Y

Batch Normalization LeNet

In [0]:
net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5),
        BatchNorm(6, num_dims=4),
        nn.Activation('sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(16, kernel_size=5),
        BatchNorm(16, num_dims=4),
        nn.Activation('sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dense(120),
        BatchNorm(120, num_dims=2),
        nn.Activation('sigmoid'),
        nn.Dense(84),
        BatchNorm(84, num_dims=2),
        nn.Activation('sigmoid'),
        nn.Dense(10))

In [7]:
lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 0.6637, train acc 0.764, test acc 0.791, time 7.2 sec
epoch 2, loss 0.4005, train acc 0.855, test acc 0.843, time 7.0 sec
epoch 3, loss 0.3555, train acc 0.870, test acc 0.843, time 7.0 sec
epoch 4, loss 0.3255, train acc 0.882, test acc 0.836, time 7.3 sec
epoch 5, loss 0.3114, train acc 0.887, test acc 0.869, time 7.6 sec


In [8]:
net[1].gamma.data().reshape((-1,)), net[1].beta.data().reshape((-1,))

(
 [1.9808863  0.71188694 1.8091408  1.3103819  1.2874316  1.899171  ]
 <NDArray 6 @gpu(0)>, 
 [ 0.910351    0.38128862 -0.07055879  0.38258594 -0.88927495 -2.020515  ]
 <NDArray 6 @gpu(0)>)

Concise Implementation

In [0]:
net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5),
        nn.BatchNorm(),
        nn.Activation('sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(16, kernel_size=5),
        nn.BatchNorm(),
        nn.Activation('sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dense(120),
        nn.BatchNorm(),
        nn.Activation('sigmoid'),
        nn.Dense(84),
        nn.BatchNorm(),
        nn.Activation('sigmoid'),
        nn.Dense(10))

In [10]:
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 0.6478, train acc 0.773, test acc 0.844, time 5.1 sec
epoch 2, loss 0.3990, train acc 0.856, test acc 0.845, time 5.1 sec
epoch 3, loss 0.3503, train acc 0.874, test acc 0.858, time 5.3 sec
epoch 4, loss 0.3218, train acc 0.883, test acc 0.872, time 5.2 sec
epoch 5, loss 0.3030, train acc 0.889, test acc 0.878, time 5.1 sec
