In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.datasets import FashionMNIST

import numpy as np
import matplotlib.pyplot as plt
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [39]:
#Add Torch with no grad to eval sptep

def train(model,
          train_dataloader,
          val_dataloader,
          optimizer,
          criterion,
          num_epochs: int,
          is_gpu: bool = torch.cuda.is_available(),
          ) -> dict:
    
    losses = []
    acc = []
    val_losses = []
    val_acc = []

    # Epoch process
    for i in range(num_epochs):
        epoch_loss = 0.0
        epoch_correct = 0
        val_epoch_loss = 0.0
        val_epoch_correct = 0

        model.train()
        #Batch process
        for inputs, labels in train_dataloader:
            inputs, labels = (inputs.to(device), labels.to(device)) if is_gpu else (inputs, labels)
            optimizer.zero_grad()
            with torch.set_grad_enabled(True):
                outputs = model(inputs.float())
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * inputs.size(0)
            epoch_correct += torch.sum(preds == labels.data)

        # Validation step
        model.eval()
        for inputs, labels in val_dataloader:
            inputs, labels = (inputs.to(device), labels.to(device)) if is_gpu else (inputs, labels)
            with torch.no_grad():
                val_outputs = model(inputs.float())
                val_loss = criterion(val_outputs, labels)
            _, val_preds = torch.max(val_outputs, 1)
            val_epoch_loss += val_loss.item() * inputs.size(0)
            val_epoch_correct += torch.sum(val_preds == labels.data)
        
        epoch_loss = epoch_loss / len(train_dataloader.dataset)
        epoch_acc = epoch_correct.double() / len(train_dataloader.dataset)
        losses.append(epoch_loss)
        acc.append(epoch_acc)

        val_epoch_loss = val_epoch_loss / len(val_dataloader.dataset)
        val_epoch_acc = val_epoch_correct.double() / len(val_dataloader.dataset)
        val_losses.append(val_epoch_loss)
        val_acc.append(val_epoch_acc)

        if (i+1) % 5 == 0:
            print(f'epoch{i+1}')
            print()
            print(f'Loss: {epoch_loss} Acc: {epoch_acc*100} Val Loss: {val_epoch_loss} Val Acc: {val_epoch_acc*100}')
            print('='*100)

    history = {
        'loss': losses,
        'val_loss': val_losses,
        'acc': acc,
        'val_acc': val_acc,
        }
    return history

# Evaluation
def eval(model, test_dataloader):
    test_loss = 0.0
    test_correct = 0

    model.eval()
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        test_outputs = model(inputs.float())
        test_loss = criterion(test_outputs, labels)
        _, test_preds = torch.max(test_outputs, 1)
        test_loss += test_loss.item() * inputs.size(0)
        test_correct += torch.sum(test_preds == labels.data)
    test_loss = test_loss / len(test_dataloader.dataset)
    test_acc = test_correct.double() / len(test_dataloader.dataset)
    return test_loss, test_acc

In [34]:

def get_dataloaders(batch_size=128, resize=224):
    data_transform = transforms.Compose([
                                        transforms.Resize(resize),
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.5), (0.5))
                                        ])


    train_dataset = datasets.FashionMNIST(
        root="datasets", train=True, transform=data_transform, download=True,
    )

    nb_train = int(0.8 * len(train_dataset))
    nb_valid =  int(0.2 * len(train_dataset))

    train_dataset, val_dataset = torch.utils.data.dataset.random_split(train_dataset, [nb_train, nb_valid])

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    val_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True
    )


    test_dataset = datasets.FashionMNIST(
        root="datasets",
        train=False,
        transform=data_transform,
        download=True,
    )
    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    return train_dataloader, val_dataloader, test_dataloader


In [None]:
# Plot learning curve
def plot_history(history: dict):
    plt.figure(figsize=(20, 10))
    plt.subplot(1, 2, 1)
    plt.title('Loss')
    plt.plot(history['loss'], label='loss')
    plt.plot(history['val_loss'], label='val_loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.title('Accuracy')
    plt.plot(history['acc'], label='acc')
    plt.plot(history['val_acc'], label='val_acc')
    plt.legend()

In [23]:
class NiNet(nn.Module):
 
    def __init__(self):
        super(NiNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.Conv2d(96, 96, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(96, 96, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(384, 10, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(10, 10, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(10, 10, kernel_size=1, stride=1, padding=0),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=6, stride=1),
            nn.Softmax()
        )
 
    def forward(self, x):
        x = self.features(x)
        #x = nn.Flatten()(x)
        #x = self.classifier(x)
        return x

In [24]:
model = NiNet()

num_epochs = 100
lr=0.1
optimizer = optim.SGD(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
h = train(model, dataloader_train, dataloader_valid, optimizer, criterion, num_epochs)

RuntimeError: Given input size: (10x5x5). Calculated output size: (10x0x0). Output size is too small

In [25]:
class Inception(nn.Module):
    def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
        super().__init__()
 
        #1x1conv branch
        self.b1 = nn.Sequential(
            nn.Conv2d(input_channels, n1x1, kernel_size=1),
            nn.BatchNorm2d(n1x1),
            nn.ReLU(inplace=True)
        )
 
        #1x1conv -> 3x3conv branch
        self.b2 = nn.Sequential(
            nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1),
            nn.BatchNorm2d(n3x3_reduce),
            nn.ReLU(inplace=True),
            nn.Conv2d(n3x3_reduce, n3x3, kernel_size=3, padding=1),
            nn.BatchNorm2d(n3x3),
            nn.ReLU(inplace=True)
        )
 
        #1x1conv -> 5x5conv branch
        #we use 2 3x3 conv filters stacked instead
        #of 1 5x5 filters to obtain the same receptive
        #field with fewer parameters
        self.b3 = nn.Sequential(
            nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1),
            nn.BatchNorm2d(n5x5_reduce),
            nn.ReLU(inplace=True),
            nn.Conv2d(n5x5_reduce, n5x5, kernel_size=3, padding=1),
            nn.BatchNorm2d(n5x5, n5x5),
            nn.ReLU(inplace=True),
            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
            nn.BatchNorm2d(n5x5),
            nn.ReLU(inplace=True)
        )
 
        #3x3pooling -> 1x1conv
        #same conv
        self.b4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(input_channels, pool_proj, kernel_size=1),
            nn.BatchNorm2d(pool_proj),
            nn.ReLU(inplace=True)
        )
 
    def forward(self, x):
        return torch.cat([self.b1(x), self.b2(x), self.b3(x), self.b4(x)], dim=1)

In [28]:
class GoogleNet(nn.Module):
 
    def __init__(self, num_class=10):
        super().__init__()
        self.prelayer = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 192, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
        )
 
        #although we only use 1 conv layer as prelayer,
        #we still use name a3, b3.......
        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 
        ##"""In general, an Inception network is a network consisting of
        ##modules of the above type stacked upon each other, with occasional
        ##max-pooling layers with stride 2 to halve the resolution of the
        ##grid"""
        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 
        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 
        #input feature size: 8*8*1024
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout2d(p=0.4)
        self.linear = nn.Linear(1024, num_class)
 
    def forward(self, x):
        x = self.prelayer(x)
        x = self.maxpool(x)
        x = self.a3(x)
        x = self.b3(x)
 
        x = self.maxpool(x)
 
        x = self.a4(x)
        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        x = self.e4(x)
 
        x = self.maxpool(x)
 
        x = self.a5(x)
        x = self.b5(x)
 
        #"""It was found that a move from fully connected layers to
        #average pooling improved the top-1 accuracy by about 0.6%,
        #however the use of dropout remained essential even after
        #removing the fully connected layers."""
        x = self.avgpool(x)
        x = self.dropout(x)
        x = x.view(x.size()[0], -1)
        x = self.linear(x)
 
        return x

In [29]:
model = GoogleNet(10)

num_epochs = 50
lr=0.1
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
h = train(model, dataloader_train, dataloader_valid, optimizer, criterion, num_epochs)

KeyboardInterrupt: 

In [40]:
data_transform = transforms.Compose([
                                    transforms.ToTensor(),
                                    ])


train_dataset = datasets.FashionMNIST(
    root="datasets", train=True, transform=data_transform, download=True
)

nb_train = int(0.8 * len(train_dataset))
nb_valid = int(0.2 * len(train_dataset))

train_dataset, val_dataset = torch.utils.data.dataset.random_split(train_dataset, [nb_train, nb_valid])

dataloader_train = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=512,
    shuffle=True,
)
dataloader_valid = torch.utils.data.DataLoader(
    train_dataset, batch_size=512, shuffle=True
)


test_dataset = datasets.FashionMNIST(
    root="datasets",
    train=False,
    transform=data_transform,
    download=True,
)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=256,
    shuffle=True,
)

In [41]:
class LeNet(nn.Module):
 
    def __init__(self, input_size, output_size):
        super(LeNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_size, 6, kernel_size=5, padding=2),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Tanh(),
            nn.BatchNorm2d(6),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Tanh(),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 120, kernel_size=5),
            nn.Tanh(),
            nn.BatchNorm2d(120),
        )
        self.classifier = nn.Sequential(
            nn.Linear(120, 84),
            nn.Tanh(),
            nn.Linear(84, 10),
            nn.Softmax(),
        )
 
    def forward(self, x):
        x = self.features(x)
        x = nn.Flatten()(x)
        x = self.classifier(x)
        return x

    
model = LeNet(1, 10)

num_epochs = 100
lr=0.1
optimizer = optim.SGD(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
h = train(model, dataloader_train, dataloader_valid, optimizer, criterion, num_epochs)

epoch5

Loss: 1.6773725372950237 Acc: 80.11666666666667 Val Loss: 1.6709065656661988 Val Acc: 80.61458333333333
epoch10

Loss: 1.6512355321248373 Acc: 81.9625 Val Loss: 1.6515842320124308 Val Acc: 81.9875
epoch15

Loss: 1.64047118918101 Acc: 82.75833333333334 Val Loss: 1.6392014706929525 Val Acc: 82.81875
epoch20

Loss: 1.6250689061482748 Acc: 84.52708333333334 Val Loss: 1.625179575284322 Val Acc: 85.02708333333334
epoch25

Loss: 1.599633267402649 Acc: 86.95416666666667 Val Loss: 1.5999348478317261 Val Acc: 86.9875
epoch30

Loss: 1.588101835568746 Acc: 88.00625 Val Loss: 1.5842829777399698 Val Acc: 88.42916666666667
epoch35

Loss: 1.5788849395116171 Acc: 88.97916666666667 Val Loss: 1.5778144731521606 Val Acc: 89.01458333333333
epoch40

Loss: 1.5725830513636272 Acc: 89.45833333333333 Val Loss: 1.5753291152318318 Val Acc: 89.1875
epoch45

Loss: 1.5667213538487752 Acc: 90.02291666666666 Val Loss: 1.571346619606018 Val Acc: 89.51875
epoch50

Loss: 1.5617999776204428 Acc: 90.48125 Val Loss: