In [1]:
# Step 1: write a short paragraph to describe your understanding of 
# 　　image classification task. Also please include a plan (e.g., 
# 　　overall　system flowchart), expected outputs and anything you 
# 　　think it should be included.

# Step 2: build a basic Alex net using tensorflow or pytorch from 
# 　　scratch. Train the net using CIFAR training data. Use Tensorboard
# 　　to help visualize and analyze the training and testing procedure.

# Step 3: Use pre-trained deep model (VGG or ResNet-18) to build image
# 　　classification model, i.e., fine-tune with CIFAR dataset.  Use 
# 　　Tensorboard to help visualize and analyze the training and 
# 　　testing procedure.

# Step 4: Analyse training/testing error and prepare the report for 
# 　　your observation and conclusion.

# Again, the purpose of this mini-project is not the final results, 
# 　　but the whole procedure you work on this project. Please jog down
# 　any problems you met, and your solutions or analysis if applicable.

In [2]:
import numpy as np
import torch.nn as nn
import torch

import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T

import torch.nn.functional as F

import matplotlib.pyplot as plt

from tensorboardX import SummaryWriter

In [2]:
import torch
dtype = torch.float32

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print('using device:', device)

using device: cpu


In [4]:
from torch.utils.data import sampler

NUM_TRAIN = 48000

transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

def getData(batch_size):
    cifar10_train = dset.CIFAR10('../datasets/', train=True, download=True, transform=transform)
    loader_train = DataLoader(cifar10_train, batch_size=batch_size,
#                               sampler=sampler.SubsetRandomSampler(list(range(NUM_TRAIN, len(cifar10_train)))), 
                              num_workers=4)

    cifar10_val = dset.CIFAR10('../datasets/', train=False, download=True, transform=transform)
    loader_val = DataLoader(cifar10_val, batch_size=batch_size, 
#                             sampler=sampler.SubsetRandomSampler(list(range(7500,len(cifar10_val)))), 
                            num_workers=4)
    
#     print("batch_size", batch_size)
    print("loader_train:", len(loader_train), "loader_val:", len(loader_val))
    return (loader_train, loader_val)
# getData(100)

In [5]:
def flatten(x):
    N = x.shape[0]
    return x.view(N, -1)

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

In [6]:
channel_1 = 96
channel_2 = 256
channel_3 = 384 
channel_4 = 384
channel_5 = 256

num_feature_1 = 96
num_feature_2 = 256
num_feature_3 = 384

feature_1 = 6*6*256
feature_2 = 2048
feature_3 = 10

def getModel():
    return nn.Sequential(
        
    nn.Conv2d(in_channels=3, out_channels=96, kernel_size=3, stride=1, padding=2, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(96),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=96, out_channels=256, kernel_size=3, stride=1, padding=2, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1, bias=True),
    nn.ReLU(),
#     nn.BatchNorm2d(384),

    nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1, bias=True),
    nn.ReLU(),
#     nn.BatchNorm2d(384),

    nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=True),
    nn.ReLU(),
    nn.BatchNorm2d(256),
    nn.MaxPool2d(kernel_size=3, stride=2),

    Flatten(),

    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=4096, out_features=feature_2),
    nn.ReLU(),

    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=feature_2, out_features=feature_2),
    nn.ReLU(),

    nn.Linear(in_features=feature_2, out_features=feature_3)
)

# model = nn.Sequential(

#     nn.Conv2d(in_channels=3, out_channels=channel_1, kernel_size=(11, 11), stride=1, padding=(2, 2), bias=True),
#     nn.ReLU(),
# #     nn.BatchNorm2d(num_features=num_feature_1),
#     nn.MaxPool2d(kernel_size=(3, 3), stride=2),

#     nn.Conv2d(in_channels=channel_1, out_channels=channel_2, kernel_size=(5, 5), stride=1, padding=(2, 2), bias=True),
#     nn.ReLU(),
# #     nn.BatchNorm2d(num_features=num_feature_2),
#     nn.MaxPool2d(kernel_size=(3, 3), stride=2),

#     nn.Conv2d(in_channels=channel_2, out_channels=channel_3, kernel_size=(3, 3), stride=1, padding=(1, 1), bias=True),
#     nn.ReLU(),
# #     nn.BatchNorm2d(num_features=num_feature_3),

#     nn.Conv2d(in_channels=channel_3, out_channels=channel_4, kernel_size=(3, 3), stride=1, padding=(1, 1), bias=True),
#     nn.ReLU(),
# #     nn.BatchNorm2d(num_features=num_feature_3),

#     nn.Conv2d(in_channels=channel_4, out_channels=channel_5, kernel_size=(3, 3), stride=1, padding=(1, 1), bias=True),
#     nn.ReLU(),
# #     nn.BatchNorm2d(num_features=num_feature_2),
#     nn.MaxPool2d(kernel_size=(3, 3), stride=2),

#     Flatten(),
    
#     nn.Linear(in_features=feature_1, out_features=feature_2),
#     nn.ReLU(),
#     nn.Dropout(p=0.5, inplace=False),

#     nn.Linear(in_features=feature_2, out_features=feature_2),
#     nn.ReLU(),
#     nn.Dropout(p=0.5, inplace=False),

#     nn.Linear(in_features=feature_2, out_features=feature_3)
# )
# print(getModel())

In [7]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')

    num_samples = 0
    num_correct = 0
    model.eval()
    for x, y in loader:
        x = x.to(device=device)
        y = y.to(device=device)
        scores = model(x)
        _, preds = torch.max(scores, dim=1)
        num_correct += (preds == y).sum()
        num_samples += x.shape[0]
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)\n' % (num_correct, num_samples, 100 * acc))
    return acc

In [8]:
def train(lr, bs, mo, epochs=100):
    print("lr:",lr, " bs:", bs, " mo:", mo)

    loader_train, loader_val = getData(bs)

    model = getModel()
    optimizer = setOptim(model, learning_rate=lr, momentum=mo)

    model = model.to(device=device)

    writer = SummaryWriter(comment="1_epoch")
    train_len = len(loader_train)

    images, labels = next(iter(loader_train))
    
    for e in range(epochs):
        epoch_loss = 0
        epoch_count = 0
        for t, (x, y) in enumerate(loader_train):
            x = x.to(device=device)
            y = y.to(device=device)
            optimizer.zero_grad() # 梯度归零
            model.train()
            scores = model(x)
            loss = F.cross_entropy(scores, y)  # 计算loss
            loss.backward() #　反向传播
            optimizer.step() # 更新参数

            epoch_count += 1
            epoch_loss += loss.item()
            
        writer.add_scalar('train', epoch_loss/epoch_count, e)
#         writer.add_scalar('train', loss.item(), e*len(loader_train)+t)
#         writer.add_graph(model, x)

        print('epoch %d, loss = %.4f' %(e, epoch_loss/epoch_count))

        acc = check_accuracy(loader_val, model)

        if e % 20 == 0:
            lr = lr/3.0
            optimizer = setOptim(model, learning_rate=lr, momentum=mo)

In [9]:
def setOptim(model, learning_rate, momentum):
    return optim.SGD(model.parameters(), lr=learning_rate, momentum = momentum)

In [10]:
learning_rates = [0.01, 0.003]
batch_sizes = [64, 128, 256, 512]
momentum = [0.9, 0.95, 0.99]
# drop_out = [0.5, 0.75]

train(0.001, 64, 0.9)

# for lr in learning_rates:
#     for bs in batch_sizes:
#         for mo in momentum:
#             train(lr, bs, mo)

lr: 0.001  bs: 64  mo: 0.9
Files already downloaded and verified
Files already downloaded and verified
loader_train: 782 loader_val: 157
epoch 0, loss = 1.5172
Checking accuracy on test set
Got 5958 / 10000 correct (59.58%)

epoch 1, loss = 1.0383
Checking accuracy on test set
Got 6467 / 10000 correct (64.67%)

epoch 2, loss = 0.9134
Checking accuracy on test set
Got 6957 / 10000 correct (69.57%)

epoch 3, loss = 0.8248
Checking accuracy on test set
Got 7168 / 10000 correct (71.68%)

epoch 4, loss = 0.7455
Checking accuracy on test set
Got 7362 / 10000 correct (73.62%)

epoch 5, loss = 0.6889
Checking accuracy on test set
Got 7468 / 10000 correct (74.68%)

epoch 6, loss = 0.6384
Checking accuracy on test set
Got 7649 / 10000 correct (76.49%)

epoch 7, loss = 0.5920
Checking accuracy on test set
Got 7672 / 10000 correct (76.72%)

epoch 8, loss = 0.5421
Checking accuracy on test set
Got 7654 / 10000 correct (76.54%)

epoch 9, loss = 0.5142
Checking accuracy on test set
Got 7798 / 10000 c

Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/rsp/anaconda3/envs/ml/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/hom

KeyboardInterrupt: 