In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm

In [2]:
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize( (0.13,), (0.31,))])

In [18]:
N_EPOCHS = 4

In [3]:
train_data = torchvision.datasets.MNIST(root='./data', 
                                        train=True, 
                                        download=True, 
                                        transform=transform)

test_data = torchvision.datasets.MNIST(root='./data', 
                                       train=False, 
                                       download=True, 
                                       transform=transform)

In [4]:
train_loader = torch.utils.data.DataLoader(dataset=train_data, 
                                           batch_size=128, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_data, 
                                          batch_size=128, 
                                          shuffle=False)

In [15]:
class CNN(nn.Module):
    
    def __init__(self):
        
        super(mycnn, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)
        
        self.weight_init()
        
    def forward(self, x):
        # 1st layer
        x = self.conv1(x)
        x = F.relu(x)
        # 2nd layer
        x = self.conv2(x)
        x = F.relu(x)
        
        x = F.max_pool2d(x, 2)
        
        x = self.dropout1(x)
        # 3rd layer - Fully Connected
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        # 4th layer - Fully Connected
        x = self.dropout2(x)
        x = self.fc2(x)
        # without crossentropy, just log softmaxing
        output = F.log_softmax(x, dim=1)
        
        return output
    
    def weight_init(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight)
                

In [16]:
model = mycnn()

In [17]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adadelta(model.parameters(), lr=0.01)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=2, verbose=True)

In [21]:
%%time
losses = []

for epoch in range(N_EPOCHS):
    
    for data, target in (train_loader):
        # zero initialization of gradients
        optimizer.zero_grad()
    
        # feed to model , forward pass
        out = model(data)
        loss = criterion(out, target)
        
        losses.append(loss.item())
        
        # backprop
        loss.backward()
        optimizer.step()
    
    mean_loss = sum(losses) / len(losses)
    print(f"epoch #{epoch}, mean loss = {mean_loss}")
    scheduler.step(mean_loss)

epoch #0, mean loss = 0.5001554741366194


KeyboardInterrupt: 