In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import dataset, DataLoader
from torchvision import datasets 
import torchvision.transforms as transforms
import torch.nn as nn

In [2]:
batch_size = 64
momentum = 0.90
L2_decay = 5.0e-4
Learning_Rate = 1.0e-2
Epochs = 75

In [3]:
train_data = datasets.CIFAR10(root = './data', train = True, download = True)

valid_data = datasets.CIFAR10(root = './data', train = False, download = True)
                             

100%|██████████| 170M/170M [00:10<00:00, 15.8MB/s] 


In [4]:
data = train_data.data / 255.0 
mean = data.mean(axis=(0, 1, 2))
std = data.std(axis=(0, 1, 2))

print(f"Mean: {mean}, Std: {std}")

Mean: [0.49139968 0.48215841 0.44653091], Std: [0.24703223 0.24348513 0.26158784]


In [5]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),\
    transforms.ToTensor(),\
    transforms.RandomCrop((224, 224)),\
    transforms.RandomHorizontalFlip(p = 0.50),\
    transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223,0.24348513,0.26158784))
])

In [6]:
train_data = datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)

valid_data = datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)

In [7]:
train_loader = DataLoader(train_data, shuffle = True, batch_size = batch_size)

valid_loader = DataLoader(valid_data, shuffle = False, batch_size = batch_size)

In [8]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU(), 
                                   nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU(), 
                                   nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv3 = nn.Sequential(nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU())
        
        self.conv4 = nn.Sequential(nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU(), 
                                   nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv5 = nn.Sequential(nn.Conv2d(256, 512, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU())
        
        self.conv6 = nn.Sequential(nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv7 = nn.Sequential(nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU())
        
        self.conv8 = nn.Sequential(nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1),
                                   nn.ReLU(), 
                                   nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.fc1 = nn.Sequential(nn.Dropout(0.50), 
                                 nn.Linear(512 * 7 * 7, 4096), 
                                 nn.ReLU())
        
        self.fc2 = nn.Sequential(nn.Dropout(0.50), 
                                 nn.Linear(4096, 4096), 
                                 nn.ReLU())
        
        self.fc3 = nn.Sequential(nn.Linear(4096, 10))
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.conv8(x)
            
        x = x.view(x.size(0), -1)
            
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
            
        return x

In [9]:
shallow_model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(shallow_model.parameters(), lr = Learning_Rate, momentum = momentum, weight_decay = L2_decay)

In [None]:
train_losses, valid_losses = [], []
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

shallow_model.to(device)

for Epoch in range(Epochs):
    shallow_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = shallow_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    train_loss = running_loss / len(train_loader.dataset)
    train_losses.append(train_loss)
    
    shallow_model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = shallow_model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
    valid_loss = running_loss /len(valid_loader.dataset)
    valid_losses.append(valid_loss)
        
    print(f'Epoch {Epoch + 1}/{Epochs} - Train loss: {train_loss}, ')

Epoch 1/75 - Train loss: 2.3030124544525146, 
Epoch 2/75 - Train loss: 1.9899833869171142, 
Epoch 3/75 - Train loss: 1.5176590591812134, 
Epoch 4/75 - Train loss: 1.2485292015457152, 
Epoch 5/75 - Train loss: 1.0249585094070435, 
Epoch 6/75 - Train loss: 0.8620086722183228, 
Epoch 7/75 - Train loss: 0.7592162500762939, 
Epoch 8/75 - Train loss: 0.6791021733665467, 
Epoch 9/75 - Train loss: 0.6256113459777832, 
Epoch 10/75 - Train loss: 0.5767996848106385, 
Epoch 11/75 - Train loss: 0.536299150762558, 
Epoch 12/75 - Train loss: 0.49691714475631715, 
Epoch 13/75 - Train loss: 0.4745468293190002, 
Epoch 14/75 - Train loss: 0.44859341942787173, 
Epoch 15/75 - Train loss: 0.42536570830345155, 
Epoch 16/75 - Train loss: 0.40002764031410215, 
Epoch 17/75 - Train loss: 0.3841819697237015, 
Epoch 18/75 - Train loss: 0.3595702667140961, 
Epoch 19/75 - Train loss: 0.346376233587265, 
Epoch 20/75 - Train loss: 0.3362429794692993, 
Epoch 21/75 - Train loss: 0.3188837728500366, 
Epoch 22/75 - Train 