In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
#torch for tensor manipulation, and gpu functionality
#torch.nn for neural network building
#optim has optimization algorithms like Adam
#torch.utils.data dataloader for batching and shuffling dataset, subset to create set for training


import torchvision
import torchvision.transforms as transforms
#torchvision for loading and preprocessing dataset
#torchvision transforms to preprocess and convert images for neural networks

import matplotlib.pyplot as plt
import numpy as np
import time
#matplotlib to visualize data and training results
#numpy for basic matrix manipulation
#time to measure training time

In [32]:
transform = transforms.ToTensor()
#scales pixels and converts images to tensors

In [33]:
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
#Load CIFAR-10 dataset and apply transform to each image

classes = [ "plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck" ]
#labels of images

In [34]:
train_dataloader = DataLoader(train_set, batch_size=128, shuffle=True)
#creates iterable with shuffled batches of 128

In [35]:
#find the mean and std of training dataset to normalize it after
mean = torch.zeros(3)
std = torch.zeros(3)
total_pixels = 0

for images, _ in train_dataloader:
    b,c,h,w = images.shape
    pixels = b*h*w
    total_pixels += pixels
#find total pixels by getting the dimensions of data
    
    mean += images.sum(dim=[0, 2, 3])
    std += (images**2).sum(dim=[0, 2, 3])
#find the sum of pixel value across all color chanels (R,G,B) and sum^2 (to find mean and std deviation after) 

mean /= total_pixels
std = torch.sqrt(std / total_pixels - mean ** 2)
#calculate mean and std deviation of each color channel in dataset
print(mean, std, total_pixels)

tensor([0.4914, 0.4822, 0.4465]) tensor([0.2470, 0.2435, 0.2616]) 51200000


In [36]:
transform = transforms.Compose([
    transforms.ToTensor(),               
    transforms.Normalize(mean, std)  #updated transform fcn which normalizes too now
])
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
#Load dataset again and apply transform including normalization to each image

total_indices = torch.randperm(len(train_set)).tolist()
train_subset = Subset(train_dataset, total_indices[:40000])
validation_subset = Subset(train_dataset, total_indices[40000:])

#training data set - 40000 images with random indices for 

train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
val_loader = DataLoader(validation_subset, batch_size=128, shuffle=False)

In [37]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3,32,3,padding = 1) #input 3(RGB channels), output channels of 32(32 features), 3x3 kernel, padding to keep edge data
        self.pool = nn.MaxPool2d(2,2) #2x2 kernel, stride of 2 to halve the matrix after first pool
        self.conv2 = nn.Conv2d(32,64,3,padding = 1) #input is 32 this time, have 64 features as output for complex features
        self.fc1 = nn.Linear(64*8*8, 128) #input of 64 channels by 8x8 (32x32 with padding=1 after two pools), and 128 neurons in this layer
        self.fc2 = nn.Linear(128, 10) #input of the 128 neurons to output logits for each 10 classes
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x))) #conv - activation - pool first layer ouputs
        x = self.pool(torch.relu(self.conv2(x))) #pool second layer outputs
        x = torch.flatten(x, 1) #fc1 expects a 2d input
        x = torch.relu(self.fc1(x)) #activation fcn on each output
        x = self.fc2(x) #final output of batch size x 10 for each class
        return x

In [38]:
model = CNN() #instantiate our model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #Nvidia GPU
model = model.to(device) #use the gpu for parallel processing



In [39]:
criterion = nn.CrossEntropyLoss() #basic loss generally used for classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #basic adaptive moment optimizer

In [63]:
num_epochs = 20 #training loops going through entire train set
for epoch in range(num_epochs):
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device) #send inputs to GPU

        #forward step to find output guesses
        outputs = model(images) #goes through CNN process to find final logits
        loss = criterion(outputs, labels) #calculates the loss

        #backprop to optimize weights
        optimizer.zero_grad() #resets gradient every batch, else would keep adding
        loss.backward() #backward prop to find dL/dw
        optimizer.step() #changes weights accordingly

In [65]:
model.eval()  #sets model to eval mode and turns off training features 
correct = 0
total = 0

with torch.no_grad():  #turn off gradient tracking, only evaluating right now
    for images, labels in val_loader:  #using validation dataset
        images, labels = images.to(device), labels.to(device) #use gpu
        outputs = model(images) #forward pass
        _, predicted = torch.max(outputs, 1)  #get only the class with highest score
        total += labels.size(0) #find total # of images
        correct += (predicted == labels).sum().item() #find # of correct guesses

print("Test Accuracy: {:.2f}%".format(100 * correct / total)) #only looking at accuracy, recall not required as dataset is balanced

Test Accuracy: 69.14%
