In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

# Tensors

In [14]:
sample = torch.tensor(np.random.randn(4,4),device='cuda:0')

In [15]:
sample+1

tensor([[-0.5769, -0.1962,  1.4789,  0.9177],
        [ 0.6475,  1.9966,  1.1973,  2.5206],
        [ 1.9048,  0.5793,  1.8409,  0.8641],
        [ 1.2287,  2.1695, -1.3196,  0.6289]], device='cuda:0',
       dtype=torch.float64)

In [16]:
(sample+1).mm(sample)#matrix multiplication

tensor([[ 2.5267,  0.9457, -1.2000, -0.7924],
        [-0.0651,  3.6593, -4.1359,  1.8847],
        [-1.3446, -1.4650,  0.5703,  0.1533],
        [-3.7522,  1.9830, -1.5520,  3.1439]], device='cuda:0',
       dtype=torch.float64)

In [19]:
sample*sample

tensor([[0.0000, 0.0000, 0.2294, 0.0000],
        [0.0000, 0.9931, 0.0389, 2.3124],
        [0.8186, 0.0000, 0.7072, 0.0000],
        [0.0523, 1.3677, 0.0000, 0.0000]], device='cuda:0',
       dtype=torch.float64)

In [17]:
sample = F.relu(sample)

In [18]:
sample

tensor([[0.0000, 0.0000, 0.4789, 0.0000],
        [0.0000, 0.9966, 0.1973, 1.5206],
        [0.9048, 0.0000, 0.8409, 0.0000],
        [0.2287, 1.1695, 0.0000, 0.0000]], device='cuda:0',
       dtype=torch.float64)

# torch.nn.Module API

In [20]:
class TwoLayerFC(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        nn.init.kaiming_normal_(self.fc2.weight)
    
    def forward(self, x):
        # forward always defines connectivity
        x = x.view(x.shape[0],-1)
        scores = self.fc2(F.relu(self.fc1(x)))
        return scores

In [23]:
input_size = 50
x = torch.zeros((64, input_size))  # minibatch size 64, feature dimension 50
model = TwoLayerFC(input_size, 42, 10)
scores = model(x)
print(scores.size())

torch.Size([64, 10])


In [94]:
class ThreeLayerConvNet(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2,fcl_1, num_classes):
        super().__init__()
        self.bn0 = nn.BatchNorm2d(in_channel,affine=False)
        self.conv_1 = nn.Conv2d(in_channel, channel_1, 5, padding=2)
        self.bn1 = nn.BatchNorm2d(channel_1)
        self.conv_2 = nn.Conv2d(channel_1, channel_2, 3, padding=1)
        self.pool1 = nn.MaxPool2d((2,2))
        self.fc1 = nn.Linear(channel_2*16*16, fcl_1)
        self.bn2 = nn.BatchNorm1d(fcl_1)
        self.drop = nn.Dropout(0.4)
        self.fc2 = nn.Linear(fcl_1,num_classes)

    def forward(self, x):

        x = self.conv_1(x)
        x = self.bn1(x)
        x = self.conv_2(x.clamp(min=0))
        x = self.pool1(x)
        x = x.view(x.shape[0],-1)#remember to flatten before joining a conv layer and fc layer
        x = self.fc1(x)
        x = self.bn2(x)
        x = self.drop(x)
        scores = self.fc2(x.clamp(min=0))
        #scores = F.relu(self.fc2(x))
        return scores

In [59]:
x = torch.zeros((64, 3, 32, 32))  # minibatch size 64, image size [3, 32, 32]
model = ThreeLayerConvNet(in_channel=3, channel_1=12, channel_2=8,fcl_1=256, num_classes=10)
scores = model(x)
print(scores.size())

torch.Size([64, 10])


# Custom ConvNet

In [32]:
class custom_convnet(nn.Module):
    def __init__(self,):
        super().__init__()
        
    def forward(self,x):
        pass

# Load Dataset

In [34]:
NUM_TRAIN = 49000

transform = T.Compose([
                T.ToTensor(),
                T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

cifar10_train = dset.CIFAR10('.datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

  0%|          | 0/170498071 [00:00<?, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to .datasets/cifar-10-python.tar.gz


170500096it [00:02, 72623916.88it/s]                               


Extracting .datasets/cifar-10-python.tar.gz to .datasets


  0%|          | 0/170498071 [00:00<?, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to datasets/cifar-10-python.tar.gz


170500096it [00:02, 78939983.96it/s]                               


Extracting datasets/cifar-10-python.tar.gz to datasets
Files already downloaded and verified


# Write required functions for training

In [54]:
device = torch.device('cuda')
print_every=100

In [55]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [56]:
def trainer(model, optimizer, epochs=1):
   
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(loader_val, model)
                print()

In [48]:
hidden_layer_size = 4000
learning_rate = 1e-2
model = TwoLayerFC(3 * 32 * 32, hidden_layer_size, 10)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

trainer(model, optimizer)

Iteration 0, loss = 3.3018
Checking accuracy on validation set
Got 136 / 1000 correct (13.60)

Iteration 100, loss = 2.0659
Checking accuracy on validation set
Got 353 / 1000 correct (35.30)

Iteration 200, loss = 1.9727
Checking accuracy on validation set
Got 374 / 1000 correct (37.40)

Iteration 300, loss = 1.8030
Checking accuracy on validation set
Got 367 / 1000 correct (36.70)

Iteration 400, loss = 1.5971
Checking accuracy on validation set
Got 414 / 1000 correct (41.40)

Iteration 500, loss = 2.1596
Checking accuracy on validation set
Got 413 / 1000 correct (41.30)

Iteration 600, loss = 1.6576
Checking accuracy on validation set
Got 440 / 1000 correct (44.00)

Iteration 700, loss = 1.7696
Checking accuracy on validation set
Got 395 / 1000 correct (39.50)



In [96]:
learning_rate = 1e-2

model = ThreeLayerConvNet(3, 32, 16, 128, 10)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

trainer(model, optimizer)

Iteration 0, loss = 2.4112
Checking accuracy on validation set
Got 124 / 1000 correct (12.40)

Iteration 100, loss = 1.7863
Checking accuracy on validation set
Got 427 / 1000 correct (42.70)

Iteration 200, loss = 1.7568
Checking accuracy on validation set
Got 475 / 1000 correct (47.50)

Iteration 300, loss = 1.4410
Checking accuracy on validation set
Got 494 / 1000 correct (49.40)

Iteration 400, loss = 1.4275
Checking accuracy on validation set
Got 513 / 1000 correct (51.30)

Iteration 500, loss = 1.3255
Checking accuracy on validation set
Got 541 / 1000 correct (54.10)

Iteration 600, loss = 1.3190
Checking accuracy on validation set
Got 537 / 1000 correct (53.70)

Iteration 700, loss = 1.1119
Checking accuracy on validation set
Got 558 / 1000 correct (55.80)



# torch.nn.Sequential API

In [61]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0],-1)

hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    Flatten(),
    nn.Linear(3 * 32 * 32, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size, 10),
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

trainer(model, optimizer)

Iteration 0, loss = 2.3201
Checking accuracy on validation set
Got 122 / 1000 correct (12.20)

Iteration 100, loss = 1.9144
Checking accuracy on validation set
Got 339 / 1000 correct (33.90)

Iteration 200, loss = 1.8564
Checking accuracy on validation set
Got 399 / 1000 correct (39.90)

Iteration 300, loss = 1.5819
Checking accuracy on validation set
Got 423 / 1000 correct (42.30)

Iteration 400, loss = 1.7657
Checking accuracy on validation set
Got 406 / 1000 correct (40.60)

Iteration 500, loss = 1.7450
Checking accuracy on validation set
Got 446 / 1000 correct (44.60)

Iteration 600, loss = 2.1108
Checking accuracy on validation set
Got 432 / 1000 correct (43.20)

Iteration 700, loss = 1.4147
Checking accuracy on validation set
Got 420 / 1000 correct (42.00)



In [77]:
channel_1 = 32
channel_2 = 16
learning_rate = 1e-4


model = nn.Sequential(
nn.Conv2d(3, 32, 5, padding = 2),
nn.ReLU(),
nn.Conv2d(32, 16, 3, padding = 1),
    Flatten(),
    nn.ReLU(),
    nn.Linear(16*32*32, 10)
)

optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

trainer(model, optimizer,3)

Iteration 0, loss = 0.5923
Checking accuracy on validation set
Got 611 / 1000 correct (61.10)

Iteration 100, loss = 0.6262
Checking accuracy on validation set
Got 612 / 1000 correct (61.20)

Iteration 200, loss = 0.5149
Checking accuracy on validation set
Got 618 / 1000 correct (61.80)

Iteration 300, loss = 0.8515
Checking accuracy on validation set
Got 622 / 1000 correct (62.20)

Iteration 400, loss = 0.4456
Checking accuracy on validation set
Got 630 / 1000 correct (63.00)

Iteration 500, loss = 0.7495
Checking accuracy on validation set
Got 630 / 1000 correct (63.00)

Iteration 600, loss = 0.5492
Checking accuracy on validation set
Got 634 / 1000 correct (63.40)

Iteration 700, loss = 0.5822
Checking accuracy on validation set
Got 632 / 1000 correct (63.20)

Iteration 0, loss = 0.7787
Checking accuracy on validation set
Got 634 / 1000 correct (63.40)

Iteration 100, loss = 0.9094
Checking accuracy on validation set
Got 638 / 1000 correct (63.80)

Iteration 200, loss = 0.8287
Check