## Train simple model on default Cifar10.

In [1]:
from importlib.util import find_spec
if find_spec("vgg") is None:
    import sys
    sys.path.append('..')

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms 
from vgg.data_loader.data_loaders import DefaultCifar10DataLoader
from vgg.model.model import CNN, Vgg19, Vgg11

In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### Load model and dataset into GPU

In [12]:
model = Vgg19()
model.to(device)
model

Vgg19(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),

In [None]:
model = Vgg11()
model.to(device)
model

In [None]:
model = CNN()
model.to(device)
model

In [6]:
dl = DefaultCifar10DataLoader('../data', 32, validation_split=0.1)

Files already downloaded and verified


In [37]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
model = Net()
model.to(device)
model

In [None]:
tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_set = torchvision.datasets.CIFAR10(root="../data", train=True,
                                        download=True, transform=tfms)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32 , shuffle=True, num_workers=2)

test_set = torchvision.datasets.CIFAR10(root="../data", train=False,
                                       download=True, transform=tfms)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

### Define a Loss function and optimzer

In [13]:
10e-4

0.001

In [14]:
criterion = nn.CrossEntropyLoss()

In [20]:
optimizer = torch.optim.SGD(model.parameters(), lr=10e-4, momentum=0.9)

### Training Loop

In [21]:
epochs = 2

In [None]:
for epoch in range(epochs):
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(dl):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        
        optimizer.step()
        running_loss+= loss.item()
        
        if batch_idx % 2000 == 0: # print every 2000 mini_batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, batch_idx + 1, running_loss / 2000))
            running_loss = 0.0
        
    # Mean Loss over training data.
    ## running_loss /= len(dl)
    
    # Calculate stats.
    ## train_acc = 
    

In [22]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dl, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')

[1,   200] loss: 2.305
[1,   400] loss: 2.304
[1,   600] loss: 2.304
[1,   800] loss: 2.305
[1,  1000] loss: 2.304
[1,  1200] loss: 2.304
[1,  1400] loss: 2.304
[2,   200] loss: 2.304
[2,   400] loss: 2.304
[2,   600] loss: 2.304
[2,   800] loss: 2.304
[2,  1000] loss: 2.304
[2,  1200] loss: 2.304
[2,  1400] loss: 2.304
Finished Training


#### NOTES:

- Either lr = 0.001 is the solution
- Or batch_size = 4 is

results:

-> Net Model:
- batch_size = 4 => loss = 1.088
- batch_size = 32 => loss = 1.726 (less iterations)
- using DefaultCifar10Dataloader => loss = 1.529

-> CNN Model:
- batch_size = 32 (dl) => loss = 1.110

-> Vgg19 Model: 
- batch_size = 32 (dl) => loss = 2.310
- batch_size = 32 dl) + remove logsoftmax => 2.309
  - decrease lr by factor of 10 => 2.305 (lr = 10e-5)

-> Vgg11 Model:
- batch_size = 32 (dl) => loss = 2.288
- batch_size = 32 dl) + remove logsoftmax => 2.291