## Deep Learning Course

### Homework 2

### Task 3:
    Train your network for 20 epochs, report the achieved accuracy on MNIST test data. Measure and report the time on one epoch for scalar and vector variants.

#### Anastasiia Kasprova

    Link to github: https://github.com/kasprova/DL_UCU/tree/master/tasks/hw2
    Link to colab: https://colab.research.google.com/drive/1aI3HH3Gj35HHsopXja6bP36Y1heE50Rm

In [0]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import time

## 1. NN training: Torch

In [0]:
class SimpleConvNet(nn.Module):
    def __init__(self, device):
        super(SimpleConvNet, self).__init__()
        self.device = device
        self.conv_layer = nn.Conv2d(in_channels=1,
                                    out_channels=20,
                                    kernel_size=5,
                                    stride=1,
                                    padding=0,
                                    dilation=1,
                                    groups=1,
                                    bias=True)
        self.fc_layer1 = nn.Linear(in_features=20 * 12 * 12, out_features=500)
        self.fc_layer2 = nn.Linear(in_features=500, out_features=10)
        self.to(device)


    def forward(self, x):
        z_conv = self.conv_layer(x)
        z_pool = F.max_pool2d(z_conv, 2, 2)
        z_pool_reshaped = z_pool.view(-1, 20*12*12)
        z_fc1 = self.fc_layer1(z_pool_reshaped)
        z_relu = F.relu(z_fc1)
        z_fc2 = self.fc_layer2(z_relu)
        y = F.softmax(z_fc2, dim=1)
        return y

In [0]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(torch.log(output), target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
#parameters
lr = 0.01
batch_size = 64
test_batch_size = 64
momentum = 0.5

no_cuda = False
torch.manual_seed(17)
np.random.seed(17)
epochs = 20
log_interval = 10

use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)

In [0]:
#model initialization
model = SimpleConvNet(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

#run training
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Test set: Average loss: -0.9258, Accuracy: 9604/10000 (96.04%)



#### Comment1: Accuracy on MNIST test after training for 20 epoch using pytorch native functions: 98.63% (after runnign 1 epoch - 96.04%)

----------------

## 2. NN Training: Vector (custom)

In [0]:
class SimpleConvNet_vector(nn.Module):
    def __init__(self, device):
        super(SimpleConvNet_vector, self).__init__()
        self.device = device
        self.conv_layer = nn.Conv2d(in_channels=1,
                                    out_channels=20,
                                    kernel_size=5,
                                    stride=1,
                                    padding=0,
                                    dilation=1,
                                    groups=1,
                                    bias=True)
        self.fc_layer1 = nn.Linear(in_features=20 * 12 * 12, out_features=500)
        self.fc_layer2 = nn.Linear(in_features=500, out_features=10)
        self.to(device)


    def forward(self, x):
        z_conv = conv2d_vector(x, conv_weight=self.conv_layer.weight,
                               conv_bias=self.conv_layer.bias,
                               device=self.device)
        z_pool = pool2d_vector(z_conv, self.device)
        z_pool_reshaped = reshape_vector(z_pool, self.device)
        z_fc1 = fc_layer_vector(z_pool_reshaped, self.fc_layer1.weight, self.fc_layer1.bias, self.device)
        z_relu = relu_vector(z_fc1, self.device)
        z_fc2 = fc_layer_vector(z_relu, self.fc_layer2.weight, self.fc_layer2.bias, self.device)
        y = F.softmax(z_fc2, dim=1)
        return y

In [0]:
def im2col(X, kernel_size, device, stride = 1):
  
    #read dimentions of input tensor - 3-dimentional
    C_in, S_in, S_in = X.shape

    #calculate size_out
    S_out = (S_in - kernel_size)//stride + 1
    
    #move to device
    X = X.to(device)
    
    #intiale output tensor of the correct size
    X_cols = torch.zeros([S_out*S_out, kernel_size*kernel_size]).to(device)
    
    for i in range(S_out):
        for j in range(S_out):
            X_cols[i*S_out+j] = X[0][i: i + kernel_size, j: j + kernel_size].contiguous().view(1, -1)
    
    return X_cols.t() # [K*K x S_out*S_out]

  
def conv_weight2rows(conv_weight):
    
    ##read dimentions of input tensor
    C_out = conv_weight.shape[0]
    kernel_size = conv_weight.shape[2]
    
    #resize 
    conv_weight_rows = conv_weight.view(C_out,kernel_size*kernel_size).contiguous()
    
    return conv_weight_rows # [C_out x K*K]
  

def conv2d_vector(x_in, conv_weight, conv_bias, device):

    #read dimentionas of input tensor and weights
    batch_size, C_in, S_in, S_in = x_in.shape
    C_out, C_in, kernel_size, kernel_size = conv_weight.shape
    
    #calculate the dimentions of output tensor
    S_out = S_in - kernel_size + 1
    
    #move to device
    x_in = x_in.to(device)
    conv_weight = conv_weight.to(device)
    conv_bias = conv_bias.to(device)
    
    #intiale output tensor of the correct size
    z = torch.zeros([batch_size,C_out,S_out,S_out]).to(device)
    
    #transformation of conv_weight
    conv_weight_rows = conv_weight2rows(conv_weight)
    
    for n in range(batch_size):
        #WconvX+b, dim(WconvX+b)=[C_out x S_out*S_out], reshape [C_out x S_out x S_out]
        z[n] = (conv_weight_rows.matmul(im2col(x_in[n], kernel_size, device, stride=1)) + conv_bias.view(-1,1)).view(C_out,S_out,S_out)
    
    return z
  
  
  def pool2d_vector(a, device, stride = 2):
    
    #read dimentionas of input tensor
    batch_size, C_in, S_in, S_in = a.shape
    pooling_size = 2
    stride = 2
    
    #calculate the dimentions of output tensor
    S_out = (S_in - pooling_size)//stride + 1 
    C_out = C_in
    
    #move to device
    a = a.to(device)
    
    #intiale an output tensor of the correct size
    z = torch.zeros([batch_size,C_out,S_out,S_out]).to(device)
    
    for n in range(batch_size):
        z[n] = im2col(a[n], pooling_size, device, stride=2).max(dim=0).values.view(-1, S_out, S_out)
        
    return z 
  
  
  def relu_vector(a, device):
    
    #move to device
    a = a.to(device)
    
    #clone input tensor
    z = a.clone().to(device)
    
    #elements < 0 replace with 0
    z[z<0] = 0
    
    return z
  
  
  def reshape_vector(a, device):
    
    batch_size = a.shape[0]
    
    #move to device
    a = a.to(device)
    
    z = a.clone().view(batch_size,-1)
    
    return z
  
  
  def fc_layer_vector(a, weight, bias, device):
    
    #move to device
    a = a.to(device)
    weight = weight.to(device)
    bias = bias.to(device)
    
    z = (a.matmul(weight.t())+ bias).clone()
    
    return z

In [0]:
#model initialization
model_vector = SimpleConvNet_vector(device)
optimizer = optim.SGD(model_vector.parameters(), lr=lr, momentum=momentum)

#run training
start_vector = time.time()
train(model_vector, device, train_loader, optimizer, epoch=1)
end_vector = time.time()
print("Duration of the 1st epoch: ", (end_vector - start_vector),"sec")
test(model_vector, device, test_loader)


#### Comment2: Accuracy on MNIST test after training for 1 epoch using custom vector functions: 86.47% 

## 3. NN Training: Scalar (training)

In [0]:
class SimpleConvNet_scalar(nn.Module):
    def __init__(self, device):
        super(SimpleConvNet_scalar, self).__init__()
        self.device = device
        self.conv_layer = nn.Conv2d(in_channels=1,
                                    out_channels=20,
                                    kernel_size=5,
                                    stride=1,
                                    padding=0,
                                    dilation=1,
                                    groups=1,
                                    bias=True)
        self.fc_layer1 = nn.Linear(in_features=20 * 12 * 12, out_features=500)
        self.fc_layer2 = nn.Linear(in_features=500, out_features=10)
        self.to(device)


    def forward(self, x):
        z_conv = conv2d_scalar(x, conv_weight=self.conv_layer.weight,
                               conv_bias=self.conv_layer.bias,
                               device=self.device)
        z_pool = pool2d_scalar(z_conv, self.device)
        z_pool_reshaped = reshape_scalar(z_pool, self.device)
        z_fc1 = fc_layer_scalar(z_pool_reshaped, self.fc_layer1.weight, self.fc_layer1.bias, self.device)
        z_relu = relu_scalar(z_fc1, self.device)
        z_fc2 = fc_layer_scalar(z_relu, self.fc_layer2.weight, self.fc_layer2.bias, self.device)
        y = F.softmax(z_fc2, dim=1)
        return y

In [0]:
def conv2d_scalar(x_in, conv_weight, conv_bias, device):
    
    #read dimentionas of input tensor and weights
    batch_size, n_channels_in, height_in, width_in = x_in.shape
    n_channels_out, n_channels_in, kernel_size, kernel_size = conv_weight.shape
    
    #calculate the dimentions of output tensor
    height_out = height_in - kernel_size + 1
    width_out = width_in - kernel_size + 1
    
    #move to device
    x_in = x_in.to(device)
    conv_weight = conv_weight.to(device)
    conv_bias = conv_bias.to(device)
    
    #intiale output tensor of the correct size
    z = torch.zeros([batch_size,n_channels_out,height_out,width_out]).to(device)
    
    #fulfill z based on scalar representation
    for n in range(batch_size):
        for c_out in range(n_channels_out):
            for c_in in range(n_channels_in):
                for m in range(height_out):
                    for l in range(width_out):
                        z[n,c_out,m,l] = (x_in[n,c_in,m:m+kernel_size,l:l+kernel_size]*conv_weight[c_out,c_in]).sum() + conv_bias[c_out]
                                                                                                                                                                                                                                                                                                                                                          
    return z

  
def pool2d_scalar(a, device, stride = 2):
    
    #read dimentionas of input tensor
    batch_size, n_channels_in, height_in, width_in = a.shape
    pooling_size = 2
    
    #calculate the dimentions of output tensor
    height_out = (height_in-pooling_size)//stride + 1
    width_out = (width_in-pooling_size)//stride + 1
    n_channels_out = n_channels_in
    
    #move to device
    a = a.to(device)
    
    #intiale an output tensor of the correct size
    z = torch.zeros([batch_size,n_channels_out,height_out,width_out]).to(device)
    
    #fulfill z based on scalar representation
    for n in range(batch_size):
        for c_out in range(n_channels_out):
            for i in range(height_out):
                for j in range(width_out):
                    z[n,c_out,i,j] = a[n,c_out,2*i:2*i+2,2*j:2*j+2].max()
    
    return z

  

def relu_scalar(a, device):
  
    #read dimentionas of input matrix
    batch_size, n_inputs = a.shape
    
    #move to device
    a = a.to(device)
    
    #intiale an output matrix of the correct size
    z = torch.zeros([batch_size, n_inputs]).to(device)
    
    for n in range(batch_size):
        for i in range(n_inputs):
            if a[n,i]<0:
                z[n,i]=0
            else:
                z[n,i]=a[n,i]
    #z.requires_grad = True          
    
    return z

  
def reshape_scalar(a, device):
    
    #read dimentionas of input tensor
    batch_size, n_channels_in, height_in, width_in = a.shape
    
    #calculate the dimentions of output tensor
    n_outputs = n_channels_in * height_in * width_in
    
    #move to device
    a = a.to(device)
    
    #intiale an output matrix of the correct size
    z = torch.zeros([batch_size, n_outputs]).to(device)
    
    for n in range(batch_size):
        for c_in in range(n_channels_in):
            for m in range(height_in):
                for l in range(width_in):
                    z[n,c_in*height_in*width_in+m*height_in+l] = a[n,c_in,m,l]
    
    return z
  

def fc_layer_scalar(a, weight, bias, device):
    
    #read dimentionas of input matrix
    batch_size, n_inputs = a.shape
    n_outputs = bias.shape[0]
    
    #move to device
    a = a.to(device)
    weight = weight.to(device)
    bias = bias.to(device)
    
    #intiale an output matrix of the correct size
    z = torch.zeros([batch_size, n_outputs]).to(device)
    
    for n in range(batch_size):
        for j in range(n_outputs):
            z[n,j] = bias[j]
            for i in range(n_inputs):
                z[n,j] += weight[j,i]*a[n,i]
                
    return z

In [0]:
#model initialization
model_scalar = SimpleConvNet_scalar(device)
optimizer = optim.SGD(model_scalar.parameters(), lr=lr, momentum=momentum)

#run training
start_scalar = time.time()
train(model_scalar, device, train_loader, optimizer, epoch = 1)
end_scalar = time.time()
print("Duration of the 1st epoch: ", (end_scalar - start_scalar),"sec")
test(model_vector, device, test_loader)

#### Comment3. Scalar functions are veeeeeery slow.. Please find the performance comparison run on dummy data here: https://colab.research.google.com/drive/1TMqoh8WRat9IsbrVJlDdMCfiuG6bxa90