In [1]:
import torch
from torch.utils.data import sampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

import os
import copy
import matplotlib.pyplot as plt

In [2]:
NUM_TRAIN = 49000

transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])
cifar10_train = torchvision.datasets.CIFAR10(root='../data/CIFAR10', train=True,
                                           download=True, transform=transform)
loader_train = torch.utils.data.DataLoader(cifar10_train, batch_size=64,
                                      sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))
cifar10_val = torchvision.datasets.CIFAR10(root='../data/CIFAR10', train=False,
                                         download=True, transform=transform)
loader_val = torch.utils.data.DataLoader(cifar10_train, batch_size=64,
                                    sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))
cifar10_test = torchvision.datasets.CIFAR10(root='../data/CIFAR10', train=False,
                                         download=True, transform=transform)
loader_test = torch.utils.data.DataLoader(cifar10_train, batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [3]:
train_images, train_labels = iter(loader_train).next()
print("train_X:", train_images.size(), "train_y:", train_labels.size())
val_images, val_labels = iter(loader_val).next()
print("val_X:", val_images.size(), "val_y:", val_labels.size())
test_images, test_labels = iter(loader_test).next()
print("test_X:", test_images.size(), "test_y:", test_labels.size())

train_X: torch.Size([64, 3, 32, 32]) train_y: torch.Size([64])
val_X: torch.Size([64, 3, 32, 32]) val_y: torch.Size([64])
test_X: torch.Size([64, 3, 32, 32]) test_y: torch.Size([64])


In [None]:
USE_GPU = True

dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print('using device', device)

using device cuda


In [None]:
class Model(nn.Module):
    def __init__(self, in_channel, channel_1, channel_2, 
                 channel_3, channel_4, num_classes):
        """
        Default initialization:
        model = Model(in_channel=3, channel_1=96, channel_2=192, 
                 channel_3=192, channel_4=192, num_classes=10)
        """
        super().__init__()
        self.conv1_1 = nn.Conv2d(in_channel, channel_1, 5, padding=2)  #(3, 96, 5)
        self.conv1_2 = nn.Conv2d(channel_1, channel_1, 1)   #(96, 96, 1)
        self.pool1 = nn.MaxPool2d(3, stride=2, padding=1)
        self.conv2_1 = nn.Conv2d(channel_1, channel_2, 5, padding=2)   #(96, 192, 5)
        self.conv2_2 = nn.Conv2d(channel_2, channel_2, 1)   #(192, 192, 1)
        self.pool2 = nn.MaxPool2d(3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(channel_2, channel_3, 3)   #(192, 192, 3)
        self.conv4 = nn.Conv2d(channel_3, channel_4, 1)   #(192, 192, 1)
        self.conv5 = nn.Conv2d(channel_4, num_classes, 1) #(192, 10, 1)
        
    def forward(self, x):
        """
        x.shape: (64, 3, 32, 32)
        """
        N = x.shape[0]
        x_drop = F.dropout(x, .2)
        conv1_1_out = F.relu(self.conv1_1(x_drop))                       # (64, 96, 32, 32)
        conv1_2_out = F.relu(self.conv1_2(conv1_1_out))             # (64, 96, 32, 32)
        layer1_out = self.pool1(conv1_2_out)                        # (64, 96, 16, 16)
        layer1_out_drop = F.dropout(layer1_out, .5)
        conv2_1_out = F.relu(self.conv2_1(layer1_out_drop))              # (64, 192, 16, 16)
        conv2_2_out = F.relu(self.conv2_2(conv2_1_out))             # (64, 192, 16, 16)        
        layer2_out = self.pool2(conv2_2_out)                        # (64, 192, 8, 8)
        layer2_out_drop = F.dropout(layer2_out, .5)
        layer3_out = F.relu(self.conv3(layer2_out_drop))                 # (64, 192, 6, 6)
        layer4_out = F.relu(self.conv4(layer3_out))                 # (64, 192, 6, 6)
        layer5_out = F.relu(self.conv5(layer4_out))                 # (64, 10, 6, 6)
#         print(x.size(), conv1_1_out.size(), conv1_2_out.size(), layer1_out.size(),layer5_out.size())
        out = layer5_out.view(N, 10, -1).mean(dim=2)
        return out
        
def test_Model():
    x = torch.zeros((64, 3, 32, 32), dtype=dtype)
    model = Model(in_channel=3, channel_1=96, channel_2=192, 
                 channel_3=192, channel_4=192, num_classes=10)
    scores = model(x)
    print(scores.size())
test_Model()

torch.Size([64, 10])


In [None]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set') 
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return acc

In [None]:
def plot(x_lst, y_lst, path, name, xlabel, ylabel):
    plt.plot(x_lst, y_lst)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()
    plt.savefig(path + name)

In [None]:
print_every = 100

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
    
def train(model, optimizer, scheduler, weight_decay=0.001, epochs=1, best_acc=0.0):
    loss_his = []
    epoch_lst = []
    acc_his = []
    
    model = model.to(device=device)
    model = nn.DataParallel(model,device_ids=[0,1])  
    
    for e in range(epochs):
        scheduler.step()
        for step, (x, y) in enumerate(loader_train):
            model.train()
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if step % print_every == 0:
                print('Epoch %d, Iteration %d, loss = %.4f' % (e, step, loss.item()))
                acc = check_accuracy(loader_val, model)
                if acc > best_acc:
                    best_acc = acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                fo = open("model_b.log", "a")
                fo.write("Epoch: {}\t Step: {}\t loss: {:.4f}\t accuracy: {:.2f}\n"\
                         .format(e, step, loss.item(), acc))
                fo.close()
                print()
        epoch_lst.append(e)
        loss_his.append(loss.item())
        acc_his.append(acc)
    
    plot(epoch_lst, loss_his, path="../results figure/", name="Model B_Loss accross Epoch", 
        xlabel="epoch", ylabel="loss")
    plot(epoch_lst, acc_his, path="../results figure/", name="Model B_Accuracy accross Epoch",
        xlabel="epoch", ylabel="accuracy")
                
    print("Best accuracy:", best_acc)
    fo = open("model_b.log", "a")
    fo.write("Best accuracy: {}".format(best_acc))
    fo.close()
    model.load_state_dict(best_model_wts)
    return model

In [None]:
lr_lst = [0.01]

fo = open("model_b.log", "w")
fo.write("Model B Trainning log\n")
fo.close()

for learning_rate in lr_lst:
    fo = open("model_b.log", "a")
    fo.write("\nOriginal Learning rate: {}\n".format(learning_rate))
    fo.close()
    print("Learning_rate:", learning_rate)
    model = Model(in_channel=3, channel_1=96, channel_2=192, 
             channel_3=192, channel_4=192, num_classes=10) 
    optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9, weight_decay=0.001)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                               milestones=[200, 250, 300], gamma=0.1)
    best_model = train(model, optimizer, scheduler, epochs=350, best_acc=0.0)
    print()

Learning_rate: 0.01
Epoch 0, Iteration 0, loss = 2.3002
Checking accuracy on validation set
Got 79 / 1000 correct (7.90)

Epoch 0, Iteration 100, loss = 2.2980
Checking accuracy on validation set
Got 137 / 1000 correct (13.70)

Epoch 0, Iteration 200, loss = 2.3042
Checking accuracy on validation set
Got 164 / 1000 correct (16.40)

Epoch 0, Iteration 300, loss = 2.1751
Checking accuracy on validation set
Got 201 / 1000 correct (20.10)

Epoch 0, Iteration 400, loss = 2.0565
Checking accuracy on validation set
Got 217 / 1000 correct (21.70)

Epoch 0, Iteration 500, loss = 2.0960
Checking accuracy on validation set
Got 234 / 1000 correct (23.40)

Epoch 0, Iteration 600, loss = 2.0926
Checking accuracy on validation set
Got 233 / 1000 correct (23.30)

Epoch 0, Iteration 700, loss = 1.9043
Checking accuracy on validation set
Got 269 / 1000 correct (26.90)

Epoch 1, Iteration 0, loss = 1.8776
Checking accuracy on validation set
Got 294 / 1000 correct (29.40)

Epoch 1, Iteration 100, loss = 1

Got 748 / 1000 correct (74.80)

Epoch 9, Iteration 600, loss = 0.3698
Checking accuracy on validation set
Got 764 / 1000 correct (76.40)

Epoch 9, Iteration 700, loss = 0.5923
Checking accuracy on validation set
Got 750 / 1000 correct (75.00)

Epoch 10, Iteration 0, loss = 0.8160
Checking accuracy on validation set
Got 738 / 1000 correct (73.80)

Epoch 10, Iteration 100, loss = 0.4059
Checking accuracy on validation set
Got 749 / 1000 correct (74.90)

Epoch 10, Iteration 200, loss = 0.7857
Checking accuracy on validation set
Got 758 / 1000 correct (75.80)

Epoch 10, Iteration 300, loss = 0.6366
Checking accuracy on validation set
Got 758 / 1000 correct (75.80)

Epoch 10, Iteration 400, loss = 0.5695
Checking accuracy on validation set
Got 777 / 1000 correct (77.70)

Epoch 10, Iteration 500, loss = 0.7259
Checking accuracy on validation set
Got 781 / 1000 correct (78.10)

Epoch 10, Iteration 600, loss = 0.7429
Checking accuracy on validation set
Got 757 / 1000 correct (75.70)

Epoch 10,

Got 795 / 1000 correct (79.50)

Epoch 19, Iteration 300, loss = 0.4251
Checking accuracy on validation set
Got 812 / 1000 correct (81.20)

Epoch 19, Iteration 400, loss = 0.2533
Checking accuracy on validation set
Got 813 / 1000 correct (81.30)

Epoch 19, Iteration 500, loss = 0.4943
Checking accuracy on validation set
Got 786 / 1000 correct (78.60)

Epoch 19, Iteration 600, loss = 0.2874
Checking accuracy on validation set
Got 795 / 1000 correct (79.50)

Epoch 19, Iteration 700, loss = 0.2990
Checking accuracy on validation set
Got 808 / 1000 correct (80.80)

Epoch 20, Iteration 0, loss = 0.3558
Checking accuracy on validation set
Got 795 / 1000 correct (79.50)

Epoch 20, Iteration 100, loss = 0.1650
Checking accuracy on validation set
Got 798 / 1000 correct (79.80)

Epoch 20, Iteration 200, loss = 0.4803
Checking accuracy on validation set
Got 804 / 1000 correct (80.40)

Epoch 20, Iteration 300, loss = 0.2551
Checking accuracy on validation set
Got 798 / 1000 correct (79.80)

Epoch 2

In [None]:
PATH = "../best_model/"
model_name = "best_model_b.pt"
if not os.path.exists(PATH):
    os.mkdir(PATH)
torch.save(best_model.state_dict(), PATH + model_name)