## Tutorial 3

In this tutorial we will learn how to 
* Use dropout in fully connected networks
* Use custom datasets in PyTorch
* Implement KL divergence

So far, we used datasets stored in PyTorch datasets. What happens if we want to use different data?

In [1]:
import torch

from torch.utils.data import Dataset, DataLoader

In [2]:
from torch.utils.data.dataset import Dataset

class SampleDataset(Dataset):
    def __init__(self):
        pass
        
    def __getitem__(self, index):
        return (img, label)

    def __len__(self):
        return count

Let's take a look at a sample dataset

In [3]:
class SampleDataset(Dataset):
    def __init__(self, n_features: int = 1024, n_samples: int = 1000):
        self.n_features = n_features
        self.n_samples = n_samples

        self.entries = self._create_entries()

    def _create_entries(self):
        entries = []

        for i in range(self.n_samples):
            entries.append({'x': torch.randn(self.n_features), 'y': 1})
        
        return entries

    def __getitem__(self, index):
        entry = self.entries[index]

        return entry['x'], entry['y']

    def __len__(self):
        return len(self.entries)

Now, let's check that it works

In [4]:
sample_dataset = SampleDataset(n_features=5, n_samples=100)

In [5]:
sample_loader = DataLoader(sample_dataset, batch_size=4, shuffle=True, num_workers=0)

In [6]:
for x, y in sample_loader:
    print(f'Input batch: {x}')
    print(f'Label batch {y}')

    break

Input batch: tensor([[ 0.5660,  0.5490,  0.5353,  1.3392,  0.7686],
        [-1.5922, -0.2144, -0.3497,  0.8319, -0.5960],
        [ 2.1391,  0.1901, -0.1482,  1.1923, -0.5077],
        [-0.5005,  0.9289, -0.8493, -0.4810,  1.0186]])
Label batch tensor([1, 1, 1, 1])


## Dropout

Let's add dropout to the model you saw a week ago

In [7]:
import torchvision

from torch import nn
from torchvision import datasets, transforms

In [8]:
input_size = 784
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [9]:
# MNIST Dataset (Images and Labels)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.3081,],std=[0.1306,])])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Dataset Loader (Input Pipline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
class TwoLayers(nn.Module):
    def __init__(self, input_size, output_size):
        super(TwoLayers, self).__init__()
        self.linear1 = nn.Linear(input_size, 100)  
        self.linear2 = nn.Linear(100, output_size)
        
        # The only difference from the previous TA
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.dropout(out)
        
        return self.linear2(torch.tanh(out))

In [11]:
model = TwoLayers(input_size, num_classes)

# Loss and Optimizer
# Softmax is internally computed.
ce_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

param= [i.nelement() for i in model.parameters()]

print ("number of parameters: ", sum(param))

number of parameters:  79510


In [12]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(-1, 28*28)        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(images)
        loss = ce_loss(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print ('Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4}'.format(epoch+1, num_epochs,
                                                                      i+1, len(train_dataset)//batch_size,
                                                                      loss.item()))    

Epoch: [1/5], Step: [1/600], Loss: 2.322
Epoch: [1/5], Step: [101/600], Loss: 2.149
Epoch: [1/5], Step: [201/600], Loss: 1.915
Epoch: [1/5], Step: [301/600], Loss: 1.812
Epoch: [1/5], Step: [401/600], Loss: 1.768
Epoch: [1/5], Step: [501/600], Loss: 1.562
Epoch: [2/5], Step: [1/600], Loss: 1.513
Epoch: [2/5], Step: [101/600], Loss: 1.497
Epoch: [2/5], Step: [201/600], Loss: 1.41
Epoch: [2/5], Step: [301/600], Loss: 1.412
Epoch: [2/5], Step: [401/600], Loss: 1.321
Epoch: [2/5], Step: [501/600], Loss: 1.235
Epoch: [3/5], Step: [1/600], Loss: 1.235
Epoch: [3/5], Step: [101/600], Loss: 1.185
Epoch: [3/5], Step: [201/600], Loss: 1.171
Epoch: [3/5], Step: [301/600], Loss: 1.188
Epoch: [3/5], Step: [401/600], Loss: 1.11
Epoch: [3/5], Step: [501/600], Loss: 1.146
Epoch: [4/5], Step: [1/600], Loss: 1.151
Epoch: [4/5], Step: [101/600], Loss: 1.136
Epoch: [4/5], Step: [201/600], Loss: 0.9381
Epoch: [4/5], Step: [301/600], Loss: 0.9362
Epoch: [4/5], Step: [401/600], Loss: 0.956
Epoch: [4/5], Step:

In [13]:
model.eval()
correct = 0
total = 0

for images, labels in test_loader:
    images = images.view(-1, 28*28)
    outputs = model(images)
    predicted = torch.argmax(outputs, 1)
    
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy of the model on the 10000 test images: ', float(correct) / total)

Accuracy of the model on the 10000 test images:  0.879


In [14]:
torch.save(model.state_dict(), 'model.pkl')

## KL divergence

In [15]:
P = torch.Tensor([0.36, 0.48, 0.16])
Q = torch.Tensor([0.333, 0.333, 0.333])

In [16]:
(P * (P / Q).log()).sum()

tensor(0.0863)

In [17]:
import torch.nn.functional as F

F.kl_div(Q.log(), P, None, None, 'sum')

tensor(0.0863)