In [94]:
import torch 
from torch import nn
from torchvision.datasets import MNIST

# Dropout from scratch

In [95]:
(torch.rand(10) > 0.5).float()

tensor([1., 1., 0., 0., 0., 0., 0., 1., 1., 1.])

In [96]:
def dropout_layer(X, dropout):
    assert 0 <= dropout <=1
    if dropout == 1: return torch.zeros_like(X)
    mask = (torch.rand(X.shape) > dropout).float()
    return mask * X / (1.0 - dropout)

# Dropout concise implementation

In [97]:
# Data Preproccessing
# Training data
N_train, N_test = 5000, 10000
mnist_train = MNIST(root='../data', train=True)
X_train, targets_train = mnist_train.data.view(-1,784).float(), mnist_train.targets
y_train = torch.zeros((len(targets_train),10))
for i in range(len(targets_train)):
    y_train[i, targets_train[i]] = 1

# test data
mnist_test = MNIST(root='../data', train=False)
X_test, targets_test = mnist_test.data.view(-1,784).float(), mnist_test.targets
y_test = torch.zeros((len(targets_test),10))
for i in range(len(targets_test)):
    y_test[i, targets_test[i]] = 1

In [98]:
# Model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784, 128)
        self.linear2 = nn.Linear(128, 64)
        self.linear3 = nn.Linear(64, 10)
    def forward(self, x):
        h = nn.functional.relu(self.linear1(x))
        h = nn.functional.dropout(h, p=0.5, training=self.training)
        h = nn.functional.relu(self.linear2(h))
        h = nn.functional.dropout(h, p=0.5, training=self.training)
        y = nn.functional.softmax(self.linear3(h))
        # y = self.linear3(h)
        return y

In [99]:
# Config model, criterion and optimizer
model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# Traning
num_epochs = 2000

for epoch in range(num_epochs):
    
    y_pred = model(X_train)
    loss = criterion(y_pred,y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch + 1)%20 == 0:
        print(f'Epoch: [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

  y = nn.functional.softmax(self.linear3(h))


Epoch: [20/2000], Loss: 1.7500
Epoch: [40/2000], Loss: 1.6558
Epoch: [60/2000], Loss: 1.6095
Epoch: [80/2000], Loss: 1.5690
Epoch: [100/2000], Loss: 1.5483
Epoch: [120/2000], Loss: 1.5377
Epoch: [140/2000], Loss: 1.5301
Epoch: [160/2000], Loss: 1.5251
Epoch: [180/2000], Loss: 1.5195
Epoch: [200/2000], Loss: 1.5149
Epoch: [220/2000], Loss: 1.5130
Epoch: [240/2000], Loss: 1.5106
Epoch: [260/2000], Loss: 1.5083
Epoch: [280/2000], Loss: 1.5064
Epoch: [300/2000], Loss: 1.5052
Epoch: [320/2000], Loss: 1.5034
Epoch: [340/2000], Loss: 1.5027
Epoch: [360/2000], Loss: 1.5014
Epoch: [380/2000], Loss: 1.5008
Epoch: [400/2000], Loss: 1.4981
Epoch: [420/2000], Loss: 1.4986
Epoch: [440/2000], Loss: 1.4974
Epoch: [460/2000], Loss: 1.4955
Epoch: [480/2000], Loss: 1.4957
Epoch: [500/2000], Loss: 1.4950
Epoch: [520/2000], Loss: 1.4938
Epoch: [540/2000], Loss: 1.4936
Epoch: [560/2000], Loss: 1.4938
Epoch: [580/2000], Loss: 1.4922
Epoch: [600/2000], Loss: 1.4931
Epoch: [620/2000], Loss: 1.4915
Epoch: [640/

In [100]:
# Prediction
model.training = False
y_hat = model(X_test)
target_hat = torch.max(y_hat, dim=-1)[1]
acc = torch.sum(targets_test == target_hat)/N_test
print(f"Test accuracy: {acc * 100:.2f}%")

Test accuracy: 96.80%


  y = nn.functional.softmax(self.linear3(h))
