In [1]:
import torch
from torch.utils.data import DataLoader 
from torch.nn import init
import torch.optim as optim 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time 

In [2]:
def load_data_fashion_mnist(batch_size, resize = None, path = './Dataset/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory"""
    trans = []
    if resize:
        trans.append(transforms.Resize(size = resize))
    trans.append(transforms.ToTensor())
    transform = transforms.Compose(trans)
    
    mnist_train = torchvision.datasets.FashionMNIST(root = path, train = True, download = True, transform = transform) 
    mnist_test = torchvision.datasets.FashionMNIST(root = path,  train = False, download = True, transform = transform)
    
    train_iter = DataLoader(mnist_train, batch_size= batch_size, shuffle = True, num_workers = 4) 
    test_iter = DataLoader(mnist_test, batch_size = batch_size, shuffle = True, num_workers = 4)
    
    return train_iter, test_iter

In [3]:
def evaluate_accuracy(data_iter, network, device = None):
    if device is None:
        device = list(network.parameters())[0].device
    with torch.no_grad():
        acc_num, n = 0.0, 0 
        for X, y in data_iter:
            network.eval()
            y_hat = network(X.to(device))
            acc_num  += (y_hat.argmax(dim = 1)==y.to(device)).float().sum().cpu().item()
            network.train()
            n += y.shape[0]
    acc = acc_num/n
    return acc 

In [4]:
def train_network(network, train_iter, test_iter, optimizer, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
    network = network.to(device)
    loss_fn = nn.CrossEntropyLoss()
    for epoch in range(num_epochs): 
        train_loss_sum, train_acc_num, batch_count, n, start_time = 0.0, 0, 0, 0, time.time() 
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = network(X)
            loss = loss_fn(y_hat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss_sum += loss.cpu().item()
            train_acc_num += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, network)
        print('epoch: %d, train_loss: %.3f, train_acc: %.3f, test_acc: %.3f, time: %.2f'
              %(epoch+1, train_loss_sum/batch_count, train_acc_num/n, test_acc, (time.time()-start_time)))

In [20]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        return F.avg_pool2d(x, kernel_size = x.shape[2:])

In [8]:
class Inception(nn.Module):
    def __init__(self,in_c,c1,c2,c3,c4):
        super().__init__()
        #line one
        self.p1_1 = nn.Conv2d(in_c, c1, kernel_size=1) 
        #line two
        self.p2_1 = nn.Conv2d(in_c, c2[0], kernel_size=1 )
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        #line three
        self.p3_1 = nn.Conv2d(in_c, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        #line four d
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_c, c4, kernel_size=1)
    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        return torch.cat((p1,p2,p3,p4), dim=1)

In [23]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                  nn.ReLU(),
                  nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                  nn.Conv2d(64, 192, kernel_size=3, padding=1),
                  nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b3 = nn.Sequential(Inception(192, 64, (96,128), (16,32),32),
                  Inception(256,128,(128,192),(32,96),64),
                  nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                   Inception(512, 160, (112, 224), (24, 64), 64),
                   Inception(512, 128, (128, 256), (24, 64), 64),
                   Inception(512, 112, (144, 288), (32, 64), 64),
                   Inception(528, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b5 = nn.Sequential(Inception(832,256,(160,320),(32,128),128),
                  Inception(832,384,(192,384),(48,128),128),
                  GlobalAvgPool2d(),
                  nn.Flatten(),
                  nn.Linear(1024,10))
google_net = nn.Sequential(b1,b2,b3,b4,b5)


In [24]:
X = torch.rand(1,1,96,96)
for name, blk in google_net.named_children():
    X = blk(X)
    print(name,'outshape:',X.shape)

0 outshape: torch.Size([1, 64, 24, 24])
1 outshape: torch.Size([1, 192, 12, 12])
2 outshape: torch.Size([1, 480, 6, 6])
3 outshape: torch.Size([1, 832, 3, 3])
4 outshape: torch.Size([1, 10])


In [28]:
batch_size = 64
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=88)

lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(google_net.parameters(), lr=lr)
train_network(google_net, train_iter, test_iter, optimizer, num_epochs)

epoch: 1, train_loss: 0.207, train_acc: 0.923, test_acc: 0.903, time: 36.64
epoch: 2, train_loss: 0.199, train_acc: 0.925, test_acc: 0.906, time: 37.82
epoch: 3, train_loss: 0.186, train_acc: 0.930, test_acc: 0.911, time: 37.83
epoch: 4, train_loss: 0.172, train_acc: 0.935, test_acc: 0.911, time: 37.46
epoch: 5, train_loss: 0.165, train_acc: 0.938, test_acc: 0.913, time: 37.21


In [30]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()
torch.cuda.max_memory_allocated()