In [1]:
import argparse
import os, sys
import time
import datetime
import random

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm

import matplotlib.pyplot as plt

# You cannot change this line.
from tools.dataloader import CIFAR10

In [2]:
# Create the neural network module: VGG16
class Vgg16(nn.Module):
    def __init__(self):
        super(Vgg16, self).__init__()
#         self.conv1 = nn.Conv2d(3,64,3) #(in, out, kernel)
#         self.conv2 = nn.Conv2d(64,64,3)
#         self.pool1 = nn.MaxPool2d(2,2) #(kernel, stride)
#         self.conv3 = nn.Conv2d(64,128,3)
#         self.conv4 = nn.Conv2d(128,128,3)
#         self.pool2 = nn.MaxPool2d(2,2)
#         self.conv5 = nn.Conv2d(128,256,3)
#         self.conv6 = nn.Conv2d(256,256,3)
#         self.fc1 = nn.Linear(1 * 1 * 256, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)
        
        
        self.conv1 = nn.Conv2d(3,64,3) #(in, out, kernel)
        self.conv1bn = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64,64,3)
        self.conv2bn = nn.BatchNorm2d(64)

        self.pool1 = nn.MaxPool2d(2,2) #(kernel, stride)
        self.conv3 = nn.Conv2d(64,128,3)
        self.conv3bn = nn.BatchNorm2d(128)

        self.pool2 = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(128,256,3)
        self.conv4bn = nn.BatchNorm2d(256)

        self.pool3 = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(2 * 2 * 256, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        

    def forward(self, x):
        x = F.relu(self.conv1bn(self.conv1(x)))
       
        x = F.relu(self.conv2bn(self.conv2(x)))
        x = self.pool1(x)
        
        x = F.relu(self.conv3bn(self.conv3(x)))
        
        x = self.pool2(x)
        x = F.relu(self.conv4bn(self.conv4(x)))
        x = self.pool3(x)

        
        #x = F.relu(self.conv6(x))
        
        
        #x = F.relu(self.fc1(x.view(-1, 1 * 1 * 256)))
        x = F.relu(self.fc1(x.view(-1, 2 * 2 * 256)))
        
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

In [3]:
# Print some parameters for checking

device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = Vgg16()
net = net.to(device)
testArray = torch.zeros([128,3,32,32])
x = testArray.to(device)
y = net(x)
for name, p in net.named_parameters():
    print(name, p.shape)

conv1.weight torch.Size([64, 3, 3, 3])
conv1.bias torch.Size([64])
conv1bn.weight torch.Size([64])
conv1bn.bias torch.Size([64])
conv2.weight torch.Size([64, 64, 3, 3])
conv2.bias torch.Size([64])
conv2bn.weight torch.Size([64])
conv2bn.bias torch.Size([64])
conv3.weight torch.Size([128, 64, 3, 3])
conv3.bias torch.Size([128])
conv3bn.weight torch.Size([128])
conv3bn.bias torch.Size([128])
conv4.weight torch.Size([256, 128, 3, 3])
conv4.bias torch.Size([256])
conv4bn.weight torch.Size([256])
conv4bn.bias torch.Size([256])
fc1.weight torch.Size([120, 1024])
fc1.bias torch.Size([120])
fc2.weight torch.Size([84, 120])
fc2.bias torch.Size([84])
fc3.weight torch.Size([10, 84])
fc3.bias torch.Size([10])


In [4]:
# Setting some hyperparameters

TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100
INITIAL_LR = 0.1
MOMENTUM = 0.9
REG = 1e-4
EPOCHS = 150
DATAROOT = "./data"
CHECKPOINT_PATH = "./saved_model_vgg"

In [5]:
# Preprocessing

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    #transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=2, hue=0.2), #contrast=2, saturation=2,
    transforms.RandomCrop([32, 32], padding=2),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))
])

In [6]:
# Call the dataset Loader
trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=1)
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=1)

Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified


In [7]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# net = Vgg16()
# net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

Train on GPU...


In [8]:
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = False
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model_vgg/model.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

Successfully loaded checkpoint: ./saved_model_vgg/model.h5
Starting from epoch 16 
Starting from learning rate 0.086813:


In [9]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)

In [10]:
# Create helper variables for graphing
val_losses = []
train_losses = []
validation_accuracies = []
epochs = []

In [None]:
# Start the training/validation process
# The process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
global_step = 0
best_val_acc = 0

for i in range(start_epoch, EPOCHS):
    #print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    # Train the training dataset for 1 epoch.
    print(len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        # Printing initial loss 
        if i == 0 and batch_idx == 0:
            print("initial loss: ",  loss)
        
        # Now backward loss
        loss.backward()
        # Apply gradient
        optimizer.step()
        
        # Calculate predicted labels
        _, predicted = outputs.max(1)
        # Calculate accuracy
        total_examples += inputs.size(0)
        correct_examples += torch.eq(targets, predicted).sum().item()

        train_loss += loss

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss / (batch_idx + 1)
        pass
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    
    train_losses.append(avg_loss)

    
    #print(datetime.datetime.now())
    # Validate on the validation dataset
    #print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            # Calculate accuracy
            total_examples += inputs.size(0)
            correct_examples += torch.eq(targets, predicted).sum().item()
            val_loss += loss

    avg_loss = val_loss / len(valloader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # For graphing
    val_losses.append(avg_loss)
    epochs.append(i)
    validation_accuracies.append(avg_acc)
    
        
    """
    Assignment 4(b)
    Learning rate is an important hyperparameter to tune. Specify a 
    learning rate decay policy and apply it in your training process. 
    Briefly describe its impact on the learning curveduring your 
    training process.    
    Reference learning rate schedule: 
    decay 0.98 for every 2 epochs. You may tune this parameter but 
    minimal gain will be achieved.
    Assignment 4(c)
    As we can see from above, hyperparameter optimization is critical 
    to obtain a good performance of DNN models. Try to fine-tune the 
    model to over 70% accuracy. You may also increase the number of 
    epochs to up to 100 during the process. Briefly describe what you 
    have tried to improve the performance of the LeNet-5 model.
    """
    DECAY_EPOCHS = 2
    DECAY = 0.98
    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            # Assign the learning rate parameter
            param_group['lr'] = current_learning_rate
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'model.h5'))

print("Optimization finished.")

Epoch 16:
352
Training loss: 0.7592, Training accuracy: 0.7374
Validation loss: 0.6400, Validation accuracy: 0.7810
Current learning rate has decayed to 0.085076
Saving ...
Epoch 17:
352
Training loss: 0.7361, Training accuracy: 0.7465
Validation loss: 0.6501, Validation accuracy: 0.7860
Saving ...
Epoch 18:
352
Training loss: 0.7169, Training accuracy: 0.7546
Validation loss: 0.5855, Validation accuracy: 0.7974
Current learning rate has decayed to 0.083375
Saving ...
Epoch 19:
352
Training loss: 0.7011, Training accuracy: 0.7585
Validation loss: 0.6091, Validation accuracy: 0.7940
Epoch 20:
352
Training loss: 0.7001, Training accuracy: 0.7600
Validation loss: 0.5763, Validation accuracy: 0.8112
Current learning rate has decayed to 0.081707
Saving ...
Epoch 21:
352
Training loss: 0.6761, Training accuracy: 0.7658
Validation loss: 0.5713, Validation accuracy: 0.8040
Epoch 22:
352
Training loss: 0.6698, Training accuracy: 0.7703
Validation loss: 0.5801, Validation accuracy: 0.8026
Curren

In [None]:
# Plot
tmp = []
for i in val_losses:
    tmp.append(i)
val_losses = tmp

tmp = []
for i in train_losses:
    tmp.append(i)
train_losses = tmp

epochs = []
for i in range(len(val_losses)):
    epochs.append(i)

    
val, idx = max((val, idx) for (idx, val) in enumerate(validation_accuracies))
print("Max validation acc:", val, " at Epoch: ", idx)
    
#plt.plot(epochs, val_losses, label="val_losses") 
plt.plot(epochs, train_losses, label="train_losses") 
plt.legend()
plt.xlabel('Epochs') 
plt.ylabel('Loss') 
plt.title('Loss curve')
plt.show()

In [13]:
from tools.testloader import TEST_SET
import numpy as np


testset = TEST_SET(root=DATAROOT, train=False, transform=transform_train)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=1)

results = []

with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # Copy inputs to device         
        inputs = inputs.to(device)
        
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output from the DNN.
        outputs = net(inputs)
        
        # Calculate predicted labels
        _, predicted = outputs.max(1)
        
        predicted_np = predicted.data.cpu().numpy()
        for i in predicted_np:
            results.append(i)
# len(results)

In [14]:
header = ["Id", "Category"]

out_to_file = []

out_to_file.append(header)

for i in range(len(results)):
    tmp = [i, results[i]]
    out_to_file.append(tmp)

#print(out_to_file)

np_out = np.array(out_to_file)

np.savetxt('output.csv', np_out, fmt='%s', delimiter=",")