<a href="https://colab.research.google.com/github/parthsdoshi/nn-lenet5-cifar100/blob/master/lenet5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn
import torchvision as tv
import torch.optim as O

In [0]:
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

In [61]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

Using device: cuda

Tesla K80
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [0]:
# flattens so we can go from conv layers to linear layers
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

In [0]:
def createLenet5(in_channels=3, init_padding=(0, 0), classes=10, activation=nn.ReLU):
    lenet5 = nn.Sequential(
        nn.Conv2d(in_channels, 6, kernel_size=(5, 5), padding=init_padding),
        activation(),
        nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
        nn.Conv2d(6, 16, kernel_size=(5, 5)),
        activation(),
        nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
        Flatten(),
        nn.Linear(16*5*5, 120),
        activation(),
        nn.Linear(120, 84),
        activation(),
        nn.Linear(84, classes)
    )

    return lenet5

In [0]:
mnist = tv.datasets.MNIST
cifar100 = tv.datasets.CIFAR100

In [0]:
train_batch = 1
test_batch = 1000

In [0]:
mnist_train_loader = torch.utils.data.DataLoader(mnist('./mnist/train', train=True, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=train_batch, shuffle=True)
mnist_train_test_loader = torch.utils.data.DataLoader(mnist('./mnist/train', train=True, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=test_batch, shuffle=True)
mnist_test_loader = torch.utils.data.DataLoader(mnist('./mnist/test', train=False, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=test_batch, shuffle=True)

In [66]:
cifar100_train_loader = torch.utils.data.DataLoader(cifar100('./cifar100/train', train=True, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=train_batch, shuffle=True)
cifar100_train_test_loader = torch.utils.data.DataLoader(cifar100('./cifar100/train', train=True, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=test_batch, shuffle=True)
cifar100_test_loader = torch.utils.data.DataLoader(cifar100('./cifar100/test', train=False, download=True,
                                                           transform=tv.transforms.Compose([
                                                               tv.transforms.ToTensor()
                                                           ])), batch_size=test_batch, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [67]:
# looks like our datasets are stored in /content
!cat /content/sample_data/README.md

This directory includes a few sample datasets to get you started.

* `california_housing_data*.csv` is California housing data from the 1990 US
  Census; more information is available at:
  https://developers.google.com/machine-learning/crash-course/california-housing-data-description

* `mnist_*.csv` is a small sample of the [MNIST
  database](https://en.wikipedia.org/wiki/MNIST_database), which is described
  at: http://yann.lecun.com/exdb/mnist/

* `anscombe.json` contains a copy of [Anscombe's
 quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet);
  it was originally described in

      Anscombe, F. J. (1973). 'Graphs in Statistical Analysis'. American Statistician. 27 (1): 17-21. JSTOR 2682899.

  and our copy was prepared by the [vega_datasets library](https://github.com/altair-viz/vega_datasets/blob/4f67bdaad10f45e3549984e17e1b3088c731503d/vega_datasets/_data/anscombe.json).


In [68]:
print("mnist")
print(f"train size: {len(mnist_train_loader) * train_batch}\ttest size: {len(mnist_test_loader) * test_batch}")
print()
print("cifar100")
print(f"train size: {len(cifar100_train_loader) * train_batch}\ttest size: {len(cifar100_test_loader) * test_batch}")

mnist
train size: 60000	test size: 10000

cifar100
train size: 50000	test size: 10000


In [0]:
fc_mnist = nn.Sequential(
    nn.Linear(28*28, 16),
    nn.Sigmoid(),
    nn.Linear(16, 16),
    nn.Sigmoid(),
    nn.Linear(16, 10)
)

In [0]:
lenet5_mnist = createLenet5(in_channels=1, init_padding=(2,2), classes=10)

In [102]:
print(fc_mnist)
print(lenet5_mnist)

Sequential(
  (0): Linear(in_features=784, out_features=16, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=16, out_features=16, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=16, out_features=10, bias=True)
)
Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (6): Linear(in_features=400, out_features=120, bias=True)
  (7): ReLU()
  (8): Linear(in_features=120, out_features=84, bias=True)
  (9): ReLU()
  (10): Linear(in_features=84, out_features=10, bias=True)
)


In [0]:
# t is of dims N * 1 where N is the batch size
# C should be the number of values for the column
def oneHotEncodeOneCol(t, C=2):
    N = t.shape[0]
    onehot = torch.Tensor([
        [0] * C
    ] * N)
    for i, v in enumerate(t):
        onehot[i, v] = 1
    
    return onehot

In [0]:
validate_every = 2000

In [107]:
fc_mnist_dev = fc_mnist.to(device)
opt = O.SGD(fc_mnist_dev.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
train_cross_entropy = []
train_accuracy = []
validation_cross_entropy = []
validation_accuracy = []

for epoch in range(2):
    n_correct = 0
    n_total = 0
    for i, batch in enumerate(mnist_train_loader):
        x, labels = batch
        x, labels = x.to(device), labels.to(device)
        N = x.shape[0]
        
        x = x.view(N, -1)
        
        # training mode (for things like dropout)
        fc_mnist_dev.train()
        
        # clear previous gradients
        opt.zero_grad()
        
        y_hat = fc_mnist_dev(x)
        loss = criterion(y_hat, labels)
        loss.backward()
        opt.step()
        
        train_cross_entropy.append(loss)
        
        n_correct += (torch.argmax(y_hat, dim=1) == labels).sum().item()
        n_total += N
        
        # evaluation mode (e.g. adds dropped neurons back in)
        fc_mnist_dev.eval()
        if i % validate_every == 0:
            n_val_correct = 0
            n_val_total = 0
            v_cross_entropy_sum = 0
            
            # don't calculate gradients here
            with torch.no_grad():
                for j, v_batch in enumerate(mnist_test_loader):
                    v_x, v_labels = v_batch
                    v_x, v_labels = v_x.to(device), v_labels.to(device)
                    v_N = v_x.shape[0]
                    v_x = v_x.view(v_N, -1)
                    
                    v_y_hat = fc_mnist_dev(v_x)
                    v_loss = criterion(v_y_hat, v_labels)
                    v_cross_entropy_sum += v_loss
                    n_val_correct += (torch.argmax(v_y_hat, dim=1) == v_labels).sum().item()
                    n_val_total += v_N

            print(f"[epoch {epoch + 1}, iteration {i}] \t accuracy: {n_val_correct / n_val_total} \t cross entropy: {v_cross_entropy_sum / n_val_total}")
            validation_accuracy.append(n_val_correct / n_val_total)
            validation_cross_entropy.append(v_cross_entropy_sum / n_val_total)
    
    print(f"epoch {epoch + 1} accumulated accuracy: {n_correct / n_total}")
    train_accuracy.append(n_correct / n_total)

[epoch 1, iteration 0] 	 accuracy: 0.0892 	 cross_entropy: 0.002350575290620327
[epoch 1, iteration 2000] 	 accuracy: 0.0974 	 cross_entropy: 0.002298793289810419
[epoch 1, iteration 4000] 	 accuracy: 0.3534 	 cross_entropy: 0.0022665569558739662
[epoch 1, iteration 6000] 	 accuracy: 0.3388 	 cross_entropy: 0.0021684144157916307
[epoch 1, iteration 8000] 	 accuracy: 0.4672 	 cross_entropy: 0.0018440253334119916
[epoch 1, iteration 10000] 	 accuracy: 0.6009 	 cross_entropy: 0.001437817933037877
[epoch 1, iteration 12000] 	 accuracy: 0.6457 	 cross_entropy: 0.001186282024718821
[epoch 1, iteration 14000] 	 accuracy: 0.6523 	 cross_entropy: 0.0010444630170240998
[epoch 1, iteration 16000] 	 accuracy: 0.7225 	 cross_entropy: 0.0009443741291761398
[epoch 1, iteration 18000] 	 accuracy: 0.751 	 cross_entropy: 0.0008631504024378955
[epoch 1, iteration 20000] 	 accuracy: 0.7911 	 cross_entropy: 0.0007969893049448729
[epoch 1, iteration 22000] 	 accuracy: 0.7927 	 cross_entropy: 0.0007382668554

In [0]:
fc_train_cross_entropy = train_cross_entropy
fc_train_accuracy = train_accuracy
fc_validation_cross_entropy = validation_cross_entropy
fc_validation_accuracy = validation_accuracy

In [116]:
lenet5_mnist_dev = lenet5_mnist.to(device)
opt = O.SGD(lenet5_mnist_dev.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
train_cross_entropy = []
train_accuracy = []
validation_cross_entropy = []
validation_accuracy = []

for epoch in range(2):
    n_correct = 0
    n_total = 0
    for i, batch in enumerate(mnist_train_loader):
        x, labels = batch
        x, labels = x.to(device), labels.to(device)
        N = x.shape[0]
        
        # training mode (for things like dropout)
        fc_mnist_dev.train()
        
        # clear previous gradients
        opt.zero_grad()
        
        y_hat = lenet5_mnist_dev(x)
        loss = criterion(y_hat, labels)
        loss.backward()
        opt.step()
        
        train_cross_entropy.append(loss)
        
        n_correct += (torch.argmax(y_hat, dim=1) == labels).sum().item()
        n_total += N
        
        # evaluation mode (e.g. adds dropped neurons back in)
        fc_mnist_dev.eval()
        if i % validate_every == 0:
            n_val_correct = 0
            n_val_total = 0
            v_cross_entropy_sum = 0
            
            # don't calculate gradients here
            with torch.no_grad():
                for j, v_batch in enumerate(mnist_test_loader):
                    v_x, v_labels = v_batch
                    v_x, v_labels = v_x.to(device), v_labels.to(device)
                    v_N = v_x.shape[0]
                    
                    v_y_hat = lenet5_mnist_dev(v_x)
                    v_loss = criterion(v_y_hat, v_labels)
                    v_cross_entropy_sum += v_loss
                    n_val_correct += (torch.argmax(v_y_hat, dim=1) == v_labels).sum().item()
                    n_val_total += v_N

            print(f"[epoch {epoch + 1}, iteration {i}] \t accuracy: {n_val_correct / n_val_total} \t cross entropy: {v_cross_entropy_sum / n_val_total}")
            validation_accuracy.append(n_val_correct / n_val_total)
            validation_cross_entropy.append(v_cross_entropy_sum / n_val_total)
    
    print(f"epoch {epoch + 1} accumulated accuracy: {n_correct / n_total}")
    train_accuracy.append(n_correct / n_total)

[epoch 1, iteration 0] 	 accuracy: 0.747 	 cross entropy: 0.0007258318364620209
[epoch 1, iteration 2000] 	 accuracy: 0.9043 	 cross entropy: 0.0003084908239543438
[epoch 1, iteration 4000] 	 accuracy: 0.9358 	 cross entropy: 0.00020822379156015813
[epoch 1, iteration 6000] 	 accuracy: 0.9527 	 cross entropy: 0.00015231870929710567
[epoch 1, iteration 8000] 	 accuracy: 0.9513 	 cross entropy: 0.00015651079593226314
[epoch 1, iteration 10000] 	 accuracy: 0.9625 	 cross entropy: 0.00012553349370136857
[epoch 1, iteration 12000] 	 accuracy: 0.963 	 cross entropy: 0.00012981842155568302
[epoch 1, iteration 14000] 	 accuracy: 0.9698 	 cross entropy: 9.72873458522372e-05
[epoch 1, iteration 16000] 	 accuracy: 0.9375 	 cross entropy: 0.0001960140943992883
[epoch 1, iteration 18000] 	 accuracy: 0.9475 	 cross entropy: 0.00016879368922673166
[epoch 1, iteration 20000] 	 accuracy: 0.9741 	 cross entropy: 8.303784125018865e-05
[epoch 1, iteration 22000] 	 accuracy: 0.9756 	 cross entropy: 7.96822