# AI6103 DL Quiz Preparation

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from random import randint
import utils
import time
from torchsummary import summary
from IPython.display import Math
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## L01 - Introduction

Amount of data $n$ required to achieve super human performance: <br> 
$\Large n = \frac{\mathit{features}}{error} = \frac{d}{e}$

In [2]:
d = 1000*1000
e = 1
n = d//e; print(n)

1000000


## L02 - Linear Algebra

### Vectors

In [3]:
# Vector addition
v1 = torch.LongTensor([1,1,2]); print(v1)
v2 = torch.LongTensor([5,-1,2]); print(v1)
vpv = v1 + v2 ;print(vpv)

tensor([1, 1, 2])
tensor([1, 1, 2])
tensor([6, 0, 4])


In [4]:
# Vector multiplication
c = 2 
v1 = torch.LongTensor([1,1,2]); print(v1)
cv = c*v1; print(cv)

tensor([1, 1, 2])
tensor([2, 2, 4])


In [5]:
## Inner product
v1 = torch.LongTensor([1,1,2]); print(v1)
v2 = torch.LongTensor([1,2,3]); print(v1)
v1v2 = torch.matmul(v1, v2); print(v1v2)

tensor([1, 1, 2])
tensor([1, 1, 2])
tensor(9)


### Matrices

In [6]:
M1 = torch.LongTensor([[1,2], [3,4],[5,0]]); print(M1)
M2 = torch.LongTensor([[10,10], [20,20],[30,30]]); print(M2)
M1pM2 = M1+M2; print(M1pM2)

tensor([[1, 2],
        [3, 4],
        [5, 0]])
tensor([[10, 10],
        [20, 20],
        [30, 30]])
tensor([[11, 12],
        [23, 24],
        [35, 30]])


In [7]:
c = 2
M1 = torch.LongTensor([[1,2], [3,4],[5,0]]); print(M1)
cM1 = c* M1; print(cM1)

tensor([[1, 2],
        [3, 4],
        [5, 0]])
tensor([[ 2,  4],
        [ 6,  8],
        [10,  0]])


### Matrix-Vector

In [8]:
W = torch.LongTensor([[1,2,3],[4,5,6]]); print(f"W: {W.shape}")
x = torch.LongTensor([1,-2,2]); print(f"x: {x.shape}")
b = torch.LongTensor([7, 10]); print(f"b: {b.shape}")
z = torch.matmul(W,x)+b
print(f"z: {z} {z.shape}")

W: torch.Size([2, 3])
x: torch.Size([3])
b: torch.Size([2])
z: tensor([10, 16]) torch.Size([2])


In [9]:
## Inner product
A = torch.LongTensor([[1,1,1],[2,3,0]])
B = torch.LongTensor([[1,2,1,1],[2,0,1,1],[3,2,1,1]])
C = torch.mm(A,B)
print(C)

tensor([[6, 4, 3, 3],
        [8, 4, 5, 5]])


### Inner & Outer Product

In [10]:
# Outer product
x = torch.LongTensor([1,2,3]); print(x) #3x1
x1 = x.unsqueeze(1); print(x)
y = torch.LongTensor([0,1,2]); print(y) #1x3

outerpdt = x1 * y; print(outerpdt) #3x3
outerpdt2 = torch.ger(x,y); print(outerpdt2)

tensor([1, 2, 3])
tensor([1, 2, 3])
tensor([0, 1, 2])
tensor([[0, 1, 2],
        [0, 2, 4],
        [0, 3, 6]])
tensor([[0, 1, 2],
        [0, 2, 4],
        [0, 3, 6]])


## L03 - Vanilla NN (Part 1)

In [11]:
## Softmax
B = torch.Tensor([[1,2,3,4],
                 [1,2,5,3]])
print(B, B.size()) # 2,4
prob = F.softmax(B, dim=1) #rows (2) matrices
print(f"=> Softmax over dim 1 (rows)")
print(prob)
print(prob.sum(1)) 
print("")
prob = F.softmax(B, dim=0) #cols (4) vectors
print(f"=> Softmax over dim 0 (cols)")
print(prob)
print(torch.max(prob))
print(prob.sum(0)) 

tensor([[1., 2., 3., 4.],
        [1., 2., 5., 3.]]) torch.Size([2, 4])
=> Softmax over dim 1 (rows)
tensor([[0.0321, 0.0871, 0.2369, 0.6439],
        [0.0152, 0.0414, 0.8310, 0.1125]])
tensor([1., 1.])

=> Softmax over dim 0 (cols)
tensor([[0.5000, 0.5000, 0.1192, 0.7311],
        [0.5000, 0.5000, 0.8808, 0.2689]])
tensor(0.8808)
tensor([1., 1., 1., 1.])


In [12]:
p_target = torch.Tensor([0,0,1,0])
probs = torch.Tensor([0.3,0.2,0.2,0.25])
err = p_target - probs; print(err, err.shape)

tensor([-0.3000, -0.2000,  0.8000, -0.2500]) torch.Size([4])


In [13]:
x = torch.rand((32)); print(x)
# ex = err.unsqueeze(1) * x; print(ex, ex.shape)
ex2 = torch.ger(err,x); print(ex2, ex2.shape)

tensor([0.0781, 0.8203, 0.7617, 0.2641, 0.4037, 0.2045, 0.8299, 0.4020, 0.8142,
        0.7116, 0.9154, 0.7089, 0.3098, 0.0277, 0.1382, 0.6920, 0.7665, 0.1703,
        0.6753, 0.3366, 0.7189, 0.3521, 0.6833, 0.6889, 0.3127, 0.7222, 0.3048,
        0.4683, 0.8068, 0.1837, 0.6462, 0.8299])
tensor([[-0.0234, -0.2461, -0.2285, -0.0792, -0.1211, -0.0613, -0.2490, -0.1206,
         -0.2443, -0.2135, -0.2746, -0.2127, -0.0929, -0.0083, -0.0415, -0.2076,
         -0.2299, -0.0511, -0.2026, -0.1010, -0.2157, -0.1056, -0.2050, -0.2067,
         -0.0938, -0.2167, -0.0914, -0.1405, -0.2420, -0.0551, -0.1939, -0.2490],
        [-0.0156, -0.1641, -0.1523, -0.0528, -0.0807, -0.0409, -0.1660, -0.0804,
         -0.1628, -0.1423, -0.1831, -0.1418, -0.0620, -0.0055, -0.0276, -0.1384,
         -0.1533, -0.0341, -0.1351, -0.0673, -0.1438, -0.0704, -0.1367, -0.1378,
         -0.0625, -0.1444, -0.0610, -0.0937, -0.1614, -0.0367, -0.1292, -0.1660],
        [ 0.0625,  0.6563,  0.6094,  0.2113,  0.3229,  0.1636

## L04 - Vanilla NN

In [14]:
from prettytable import PrettyTable
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: 
            continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [15]:
class One_Layer_Net(nn.Module):
    def __init__(self, input_size, output_size):
        super(One_Layer_Net, self).__init__()
        self.linear_layer = nn.Linear(input_size,output_size,bias=True)
    def forward(self, x): 
#         x = self.linear_layer(x)
#         p = F.softmax(x, dim=1)
        return p
    
net = One_Layer_Net(784, 10)


count_parameters(net)

+---------------------+------------+
|       Modules       | Parameters |
+---------------------+------------+
| linear_layer.weight |    7840    |
|  linear_layer.bias  |     10     |
+---------------------+------------+
Total Trainable Params: 7850


7850

In [16]:
class LeNet5_convnet(nn.Module):

    def __init__(self):
        super(LeNet5_convnet, self).__init__()
        # CL1:   28 x 28  -->    50 x 28 x 28 
        self.conv1 = nn.Conv2d(1,   50,  kernel_size=3,  padding=1 )
        # MP1: 50 x 28 x 28 -->    50 x 14 x 14
        self.pool1  = nn.MaxPool2d(2,2)
        # CL2:   50 x 14 x 14  -->    100 x 14 x 14 
        self.conv2 = nn.Conv2d(50, 100,  kernel_size=3,  padding=1 ) # COMPLETE HERE
        # MP2: 100 x 14 x 14 -->    100 x 7 x 7
        self.pool2 = nn.MaxPool2d(2,2) # COMPLETE HERE
        # LL1:   100 x 7 x 7 = 4900 -->  100 
        self.linear1 = nn.Linear(4900, 100) # COMPLETE HERE
        # LL2:   100  -->  10 
        self.linear2 = nn.Linear(100, 10) # COMPLETE HERE


    def forward(self, x):
#         # CL1:   28 x 28  -->    50 x 28 x 28 
#         x = self.conv1(x)
#         x = F.relu(x)
#         # MP1: 50 x 28 x 28 -->    50 x 14 x 14
#         x = self.pool1(x)
#         # CL2:   50 x 14 x 14  -->    100 x 14 x 14
#         x = self.conv2(x) # COMPLETE HERE
#         x = F.relu(x) # COMPLETE HERE
#         # MP2: 100 x 14 x 14 -->    100 x 7 x 7
#         x = self.pool2(x) # COMPLETE HERE
#         # LL1:   100 x 7 x 7 = 4900  -->  100 
#         x = x.view(-1, 4900)
#         x = self.linear1(x) # COMPLETE HERE
#         x = F.relu(x)# COMPLETE HERE
#         # LL2:   4900  -->  10 
#         x = self.linear2(x) # COMPLETE HERE
        return x
    
conv = LeNet5_convnet()
count_parameters(conv)

+----------------+------------+
|    Modules     | Parameters |
+----------------+------------+
|  conv1.weight  |    450     |
|   conv1.bias   |     50     |
|  conv2.weight  |   45000    |
|   conv2.bias   |    100     |
| linear1.weight |   490000   |
|  linear1.bias  |    100     |
| linear2.weight |    1000    |
|  linear2.bias  |     10     |
+----------------+------------+
Total Trainable Params: 536710


536710

In [31]:
def cal_conv_filter(I: int, O: int, P:int = 0, S:int=1):
    assert S > 0
    f = I + 2*P - S*(O-1)
    print(f"CONV filter size for I:{I} O:{O}, padding:{P} stride:{S} = {f}")
    return f

In [32]:
cal_conv_filter(32, 28, 0, 1)
cal_conv_filter(14, 10, 0, 1)

CONV filter size for I:32 O:28, padding:0 stride:1 = 5
CONV filter size for I:14 O:10, padding:0 stride:1 = 5


5

In [37]:
def cal_max_pool(I: int, O: int):
    # P == 0, S == f
    f = I//O
    print(f"MP filter size for I:{I} O:{O} = {f}")
    return f

In [39]:
cal_max_pool(28, 14)

MP filter size for I:28 O:14 = 2


2