<a href="https://colab.research.google.com/github/prasanth5reddy/D2L/blob/master/Modern%20Convolutional%20Networks/densely_connected_networks_DenseNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mounting Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
w_dir = '/content/drive/My Drive/Colab/D2L.AI/'
sys.path.append(w_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Installing Libraries

In [2]:
!pip uninstall mxnet && pip install mxnet-cu100



Importing Libraries

In [0]:
from mxnet import gluon, init, nd
from mxnet.gluon import nn
import d2l

Dense Blocks

In [0]:
def conv_block(num_channels):
  blk = nn.Sequential()
  blk.add(nn.BatchNorm(),
          nn.Activation('relu'),
          nn.Conv2D(num_channels, kernel_size=3, padding=1))
  return blk

In [0]:
class DenseBlock(nn.Block):
  def __init__(self, num_convs, num_channels, **kwargs):
    super(DenseBlock, self).__init__(**kwargs)
    self.net = nn.Sequential()
    for _ in range(num_convs):
      self.net.add(conv_block(num_channels))
      
  def forward(self, X):
    for blk in self.net:
      Y = blk(X)
      X = nd.concat(X, Y, dim=1)
    
    return X

In [6]:
blk = DenseBlock(2, 10)
blk.initialize()
X = nd.random.uniform(shape=(4, 3, 8, 8))
Y = blk(X)
Y.shape

(4, 23, 8, 8)

Transition Layers

In [0]:
def transition_block(num_channels):
  blk = nn.Sequential()
  blk.add(nn.BatchNorm(), nn.Activation('relu'),
          nn.Conv2D(num_channels, kernel_size=1),
          nn.AvgPool2D(pool_size=2, strides=2))
  return blk

In [8]:
blk = transition_block(10)
blk.initialize()
blk(Y).shape

(4, 10, 4, 4)

DenseNet Model

In [0]:
net = nn.Sequential()
net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
        nn.BatchNorm(), nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3, strides=2, padding=1))

In [0]:
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]

for i, num_convs in enumerate(num_convs_in_dense_blocks):
  net.add(DenseBlock(num_convs, growth_rate))
  num_channels += num_convs * growth_rate
  if i != len(num_convs_in_dense_blocks) - 1:
    num_channels //= 2
    net.add(transition_block(num_channels))

In [0]:
net.add(nn.BatchNorm(),
        nn.Activation('relu'),
        nn.GlobalAvgPool2D(),
        nn.Dense(10))

Training

In [12]:
lr, num_epochs, batch_size, ctx = 0.1, 5, 256, d2l.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)

d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 0.5351, train acc 0.812, test acc 0.760, time 41.6 sec
epoch 2, loss 0.3150, train acc 0.886, test acc 0.863, time 39.2 sec
epoch 3, loss 0.2651, train acc 0.903, test acc 0.900, time 39.0 sec
epoch 4, loss 0.2359, train acc 0.914, test acc 0.912, time 39.3 sec
epoch 5, loss 0.2115, train acc 0.923, test acc 0.906, time 39.1 sec
