<a href="https://colab.research.google.com/github/rajlm10/D2L-Torch/blob/main/D2L_Dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from torch import nn

In [2]:
def get_fashion_mnist_labels(labels): 
  """Return text labels for the Fashion-MNIST dataset.""" 
  text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat','sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'] 
  return [text_labels[int(i)] for i in labels]

In [3]:
def accuracy(y_hat,y):
  """Compute the number of correct predictions.""" 
  if y_hat.shape[0]>1 and y_hat.shape[1]>1:
    y_hat=y_hat.argmax(axis=1)
  cmp=y_hat.type(y.dtype)==y
  return float(cmp.type(y.dtype).sum())


In [4]:
class Accumulator: 
  """For accumulating sums over `n` variables.""" 
  def __init__(self, n):
    self.data = [0.0] * n 
    
  def add(self, *args):
    self.data = [a + float(b) for a, b in zip(self.data, args)] 
    
  def reset(self):
    self.data = [0.0] * len(self.data)
  
  def __getitem__(self, idx): 
    return self.data[idx]

In [5]:
def evaluate_accuracy(net,test_iter):
  """Compute the accuracy for a model on a dataset."""
  if isinstance(net,torch.nn.Module):
    net.eval()
  metric=Accumulator(2) #no of correct preds, no of predictions
  with torch.no_grad():
    for X,y in test_iter:
      metric.add(accuracy(net(X),y),y.numel())
  return metric[0]/metric[1]


In [6]:
def get_workers():
  return 2

In [7]:
def load_fashion_mnist(batch_size,resize=None):
  trans=[transforms.ToTensor()] #PIL image to tensor (normalized between 0-1)
  if resize:
    trans.insert(0,transforms.Resize(resize))
  trans=transforms.Compose(trans) #Chains together transforms
  
  mnist_train=torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
  mnist_test=torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=True)

  return data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=get_workers()),data.DataLoader(mnist_test,batch_size,shuffle=True,num_workers=get_workers())

In [8]:
batch_size = 256
train_iter, test_iter = load_fashion_mnist(batch_size)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw



In [9]:
def dropout_layer(activations,dropout):
  assert 0 <= dropout <= 1
  if dropout==1:
    return torch.zeros_like(activations)
  if dropout==0:
    return activations
  mask=(torch.rand(activations.shape)>dropout).float() #Random uniform distribution [0,1]

  return mask*activations/(1.0-dropout)

In [14]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
dropout1, dropout2 = 0.2, 0.5

class Net(nn.Module):
  def __init__(self,num_inputs,num_outputs,num_hiddens1,num_hiddens2,is_training=True):
    super(Net,self).__init__()
    self.num_inputs=num_inputs
    self.is_training=is_training
    self.lin1=nn.Linear(num_inputs,num_hiddens1)
    self.lin2=nn.Linear(num_hiddens1,num_hiddens2)
    self.lin3=nn.Linear(num_hiddens2,num_outputs)
    self.relu=nn.ReLU()

  def forward(self,X):
    H1=self.relu(self.lin1(X.reshape(-1,num_inputs)))
    # Use dropout only when training the model  
    if self.is_training:
      H1=dropout_layer(H1,dropout1)   
    H2=self.relu(self.lin2(H1))
    if self.is_training:
      H2=dropout_layer(H2,dropout2)

    out = self.lin3(H2)
    return out

net=Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2)

In [15]:
def train_epoch(net,training_set,loss,optimizer):
  if (isinstance(net,torch.nn.Module)):
    net.train()

  metric=Accumulator(3) #stores sum of training loss, sum of training accuracy, no. of examples
  for X,y in training_set:
    y_hat=net(X)
    l=loss(y_hat,y) # nX10, nX1 -> nX1

    if isinstance(optimizer,torch.optim.Optimizer):
      optimizer.zero_grad()
      l.mean().backward()
      optimizer.step()

    metric.add(float(l.sum()),accuracy(y_hat,y),y.shape[0])
  return metric[0]/metric[2], metric[1]/metric[2]



In [16]:
def train(net,training_set,test_set,loss,optimizer,num_epochs):
    for epoch in range(num_epochs):
      train_loss,train_acc=train_epoch(net,training_set,loss,optimizer)
      test_acc = evaluate_accuracy(net, test_set)

      print(f'''epoch {epoch+1}: Train Loss: {train_loss},Train Acc: {train_acc}, Test Acc: {test_acc}''')


In [17]:
num_epochs, lr, batch_size = 10, 0.5, 256
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)

train(net,train_iter,test_iter,loss,trainer,num_epochs)

epoch 1: Train Loss: 0.912048797861735,Train Acc: 0.6603166666666667, Test Acc: 0.764
epoch 2: Train Loss: 0.5352540400187175,Train Acc: 0.8038333333333333, Test Acc: 0.7954
epoch 3: Train Loss: 0.46573794848124184,Train Acc: 0.8301666666666667, Test Acc: 0.8324
epoch 4: Train Loss: 0.43205339024861655,Train Acc: 0.84285, Test Acc: 0.8101
epoch 5: Train Loss: 0.4048792521794637,Train Acc: 0.8529833333333333, Test Acc: 0.8368
epoch 6: Train Loss: 0.38504457925160723,Train Acc: 0.8602, Test Acc: 0.8268
epoch 7: Train Loss: 0.3714546977996826,Train Acc: 0.8639333333333333, Test Acc: 0.8289
epoch 8: Train Loss: 0.36186040891011556,Train Acc: 0.8685166666666667, Test Acc: 0.8471
epoch 9: Train Loss: 0.3489550939242045,Train Acc: 0.8707, Test Acc: 0.8327
epoch 10: Train Loss: 0.3412602102279663,Train Acc: 0.8729666666666667, Test Acc: 0.849


# Inbuilt layer 

In [23]:
net=nn.Sequential(
    nn.Flatten(),
    nn.Linear(784,256),
    nn.ReLU(),
    nn.Dropout(dropout1),
    nn.Linear(256,256),
    nn.ReLU(),
    nn.Dropout(dropout2),
    nn.Linear(256,10)
)

def init_weights(layer):
  if isinstance(layer,nn.Linear):
    nn.init.normal_(layer.weight,std=0.01)

net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=256, out_features=256, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.5, inplace=False)
  (7): Linear(in_features=256, out_features=10, bias=True)
)

In [24]:
trainer = torch.optim.SGD(net.parameters(), lr=lr) #We initialize a new trainer

train(net,train_iter,test_iter,loss,trainer,num_epochs)


epoch 1: Train Loss: 1.1860956886291505,Train Acc: 0.5430333333333334, Test Acc: 0.7129
epoch 2: Train Loss: 0.5906720139821371,Train Acc: 0.77985, Test Acc: 0.8059
epoch 3: Train Loss: 0.49914352366129555,Train Acc: 0.81765, Test Acc: 0.8142
epoch 4: Train Loss: 0.44643895702362063,Train Acc: 0.8367333333333333, Test Acc: 0.8112
epoch 5: Train Loss: 0.4219759000142415,Train Acc: 0.8464666666666667, Test Acc: 0.8362
epoch 6: Train Loss: 0.39922035910288495,Train Acc: 0.8530333333333333, Test Acc: 0.8476
epoch 7: Train Loss: 0.3804882472038269,Train Acc: 0.8611166666666666, Test Acc: 0.8473
epoch 8: Train Loss: 0.3704915075937907,Train Acc: 0.86485, Test Acc: 0.8573
epoch 9: Train Loss: 0.3563785500526428,Train Acc: 0.86855, Test Acc: 0.8504
epoch 10: Train Loss: 0.3467910073598226,Train Acc: 0.8733666666666666, Test Acc: 0.8423
