<a href="https://colab.research.google.com/github/step-cheng/CIFAR10-pytorch/blob/main/CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch>=2.0.1
!pip install torchvision



In [1]:
import torch
from torch import nn
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

In [105]:

train_data = datasets.CIFAR10(root='./', train=True, download=True, transform=transforms.ToTensor(),
                              target_transform=transforms.Lambda(lambda y : torch.zeros(10,dtype=torch.float).scatter(dim=0,
                                                                                                                       index=torch.tensor(y),
                                                                                                                       value=1)))
# DataLoader wraps an iterable over dataset for automatic batching, sampling, and dataloading
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

test_data = datasets.CIFAR10(root='./', train=False, download=True, transform=transforms.ToTensor(),
                             target_transform=transforms.Lambda(lambda y : torch.zeros(10,dtype=torch.float).scatter(dim=0,
                                                                                                                       index=torch.tensor(y),
                                                                                                                       value=1)))
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)

classes = {
    0 : 'plane',    1 : 'car',    2 : 'bird',   3 : 'cat',    4 : 'deer',
    5 : 'dog',      6 : 'frog',   7 : 'horse',  8 : 'ship',   9 : 'truck'
}

Files already downloaded and verified
Files already downloaded and verified


In [108]:
# img, label = training_data[1]
train_iter = iter(train_loader)
# img, label = next(train_iter)
# print(img.shape)
# print(label.shape)

# shows image tensor
def show(tens, label):
  # make_grid makes a grid of images from a batch of tensors BxCxHxW
  label = torch.argmax(label,dim=1)
  print(label)
  img = torchvision.utils.make_grid(tens) # flattens B dimension to make a grid
  np_img = img.numpy()
  plt.imshow(np.transpose(np_img, (1,2,0)))     # seems each image is spaced by 2 pixels, channels goes last when plotting
  # title = classes[label]
  title = ' '.join([classes[t.item()] for t in label])
  plt.title(title)

# show(img, label)


In [None]:
# # sample custom dataset to illustrate a dataset and enumerate loader

# class sample():
#   def __init__(self):
#     numbers = range(0,100,2)
#     self.data = numbers

#   # need __len__ for shuffling
#   def __len__(self):
#     return len(self.data)

#   # need __getitem__ for subscripting
#   def __getitem__(self, ind):
#     return self.data[ind]

# sample = sample()
# loader = torch.utils.data.DataLoader(sample, batch_size = 5, shuffle = True)

# for i, batch in enumerate(loader):
#   print(i, batch)



In [122]:
# Make a class for your model

# nn is composed of a bunch of subclasses, myNN inherits from nn.Module
# within a subclass of torch.nn.Module, it's assumed we want to track gradients on the layer's weights
class myNN(nn.Module):

  def __init__(self):
    super().__init__()
    self.flat = nn.Flatten()

    # Sequential class stores modules that will be passed sequentially through constructor
    # input: 64x3x32x32;   output: 64x16x8x8
    self.conv_pool_stack = nn.Sequential(
      nn.Conv2d(3,16,kernel_size=3,padding=1, bias=False),
      nn.ReLU(),
      nn.Conv2d(16,16,kernel_size=3,padding=1, bias=False),
      nn.ReLU(),
      nn.MaxPool2d(2,stride=2),
      nn.BatchNorm2d(16),
      nn.Conv2d(16,32,kernel_size=3,padding=1,bias=False),
      nn.ReLU(),
      nn.Conv2d(32,32,kernel_size=3,padding=1,bias=False),
      nn.ReLU(),
      nn.MaxPool2d(2,stride=2),
      nn.BatchNorm2d(32)
    )

    # input: 64x32x8x8;    output: 16x10
    self.linear_relu_stack = nn.Sequential(
      self.flat,           # Flatten class flattens starting at dimension default 1 and ending at dimension default -1 --> 16x2048
      nn.Linear(2048, 512, bias=True),
      nn.ReLU(),
      nn.Dropout(p=0.5),
      nn.Linear(512, 512, bias=True),
      nn.ReLU(),
      nn.Dropout(p=0.5),
      nn.Linear(512, 10, bias=True),
      nn.Softmax(dim=1)
    )

  def forward(self,x):
    after_conv = self.conv_pool_stack(x)
    logits = self.linear_relu_stack(after_conv)
    return logits

device = ("gpu" if torch.cuda.is_available() else "cpu")

# to() creates an instance of the network and moves it to a device
model = myNN().to(device)
# print(myNN)
print(model)



myNN(
  (flat): Flatten(start_dim=1, end_dim=-1)
  (conv_pool_stack): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (7): ReLU()
    (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (linear_relu_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=2048, out_features=512, bias=True)
    (2): ReLU()

In [93]:
# print(model.conv_pool_stack[0].weight[0,0])
# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(),lr=0.1)

# input = torch.randn((4,3,32,32),requires_grad=False)
# target = torch.randint(0,10,size=(4,))
# logits = model(input)
# loss = loss_fn(logits,target)
# print(loss)
# loss.backward()
# print(model.conv_pool_stack[0].weight[0,0])
# print(model.conv_pool_stack[0].weight.grad[0,0])
# optimizer.step()
# print(model.conv_pool_stack[0].weight[0,0])
# print(model.conv_pool_stack[0].weight.grad[0,0])
# optimizer.zero_grad()

In [117]:
def preprocess(x):
  # find std and mean per channel
  x_stds = torch.sqrt(torch.var(x, dim=(0,2,3)))
  x_means = torch.sum(x, dim=(0,2,3)) / (x.shape[0]*x.shape[2]*x.shape[3])

  for channel in range(3):
    x[:,channel,:,:] -= x_means[channel]
    x[:,channel,:,:] /= x_stds[channel]
  return x

In [None]:
# loss = nn.CrossEntropyLoss()
# input = torch.randn(3,5,requires_grad=True)
# target = torch.empty(3, dtype=torch.long).random_(5)
# print(input)
# print(target)
# output = loss(input, target)
# print(output)

tensor([[ 0.3937, -1.0645, -0.1070, -0.5093,  0.4135],
        [-0.6726, -0.8393,  0.1113,  1.4253,  0.5300],
        [-0.0546, -1.6749,  1.4007, -1.8935,  1.1557]], requires_grad=True)
tensor([0, 4, 0])
tensor(1.6397, grad_fn=<NllLossBackward0>)


In [73]:
def accuracy(pred,y):
  guesses = torch.argmax(pred,dim=1)
  matches = sum(guesses==y)
  return matches/torch.numel(y), matches


In [118]:
def train(dataloader, model, loss_fn, optimizer):
  # sets the module to training mode, eval does the opposite
  model.train()

  accs = []
  matches = 0

  for batch, (X, y) in enumerate(dataloader):
    imgs = X.to(device)
    labels = y.to(device)

    # do not call model.foward(X), doing model(X) calls the __call__() function which does a few extra hooks...
    imgs = preprocess(imgs)
    pred = model(imgs)
    loss = loss_fn(pred, torch.argmax(labels,dim=1))
    acc, m = accuracy(pred,torch.argmax(labels,dim=1))
    matches += m

    if batch % 50 == 0:
      accs.append(acc)
      print(f"Progress: {(batch) * len(X)}, total accuracy: {matches/((batch+1)*len(X))}")

    # Backpropagation
    loss.backward()     # backpropagates prediction loss, deposits gradients of the loss w.r.t. for each parameter that has requires_grad=True
    optimizer.step()    # updates the parameter values, "learns"
    optimizer.zero_grad()       # zeros the grads because it is a dynamic graph



In [None]:
def test():

  # set module to testing mode, train does the opposite
  model.eval()

  # used to turn off gradient calculations, context manager
  with torch.no_grad():
    pass

In [120]:
# set cross entropy loss and Adam optimizer functions
loss_fn = nn.CrossEntropyLoss()     # THIS NEEDS TARGET VALUES, NOT ONE-HOT ENCODING
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

In [121]:
epochs = 5
for e in range(epochs):
  train(train_loader, model, loss_fn, optimizer)


Progress: 0, total accuracy: 0.203125
Progress: 3200, total accuracy: 0.3128063678741455
Progress: 6400, total accuracy: 0.34514233469963074
Progress: 9600, total accuracy: 0.3686879277229309
Progress: 12800, total accuracy: 0.38627177476882935
Progress: 16000, total accuracy: 0.40214142203330994
Progress: 19200, total accuracy: 0.41445183753967285
Progress: 22400, total accuracy: 0.4283742904663086
Progress: 25600, total accuracy: 0.44217580556869507
Progress: 28800, total accuracy: 0.4525013864040375
Progress: 32000, total accuracy: 0.4610154628753662
Progress: 35200, total accuracy: 0.46645304560661316
Progress: 38400, total accuracy: 0.4724677503108978
Progress: 41600, total accuracy: 0.47806259989738464
Progress: 44800, total accuracy: 0.4829261898994446
Progress: 48000, total accuracy: 0.4881408214569092
Progress: 0, total accuracy: 0.609375
Progress: 3200, total accuracy: 0.6011029481887817
Progress: 6400, total accuracy: 0.5943688154220581
Progress: 9600, total accuracy: 0.5880

In [None]:
import torchvision.models as models
# create instance of a model
vgg = models.vgg16()
print(vgg)
# torch.save(vgg.state_dict())
# vgg.load_state_dict(torch.load('model_weights.pth'))
# model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1