### 1. Fashion MNIST Data Read

use load_mnist function in https://github.com/zalandoresearch/fashion-mnist

In [1]:
"""
Define Load MNIST
"""
import os
import gzip
import numpy as np

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [3]:
"""
Read Train Dataset and Divide Train/Val Dataset
"""
img_root_path = 'fashion-mnist/data/fashion'
num_class = 10

img, label = load_mnist(img_root_path, kind='train')

for i in range(num_class):
    class_img = img[np.where(label==i), :][0]
    class_img = np.reshape(class_img, (class_img.shape[0], 28, 28))
    
    print('Class {} Number: {}'.format(i, class_img.shape[0]))

    np.random.shuffle(class_img)

    train_num = int(class_img.shape[0] * 0.8)

    train_img = class_img[:train_num, : ,:]
    train_label = np.zeros(train_num)
    train_label[:] = i

    val_img = class_img[train_num:, :, :]
    val_label = np.zeros(val_img.shape[0])
    val_label[:] = i

    if i == 0:
        train_img_total = train_img
        train_label_total = train_label
        val_img_total = val_img
        val_label_total = val_label
    else:
        train_img_total = np.concatenate((train_img_total, train_img), axis=0)
        train_label_total = np.concatenate((train_label_total, train_label), axis=0)
        val_img_total = np.concatenate((val_img_total, val_img), axis=0)
        val_label_total = np.concatenate((val_label_total, val_label), axis=0)

Class 0 Number: 6000
Class 1 Number: 6000
Class 2 Number: 6000
Class 3 Number: 6000
Class 4 Number: 6000
Class 5 Number: 6000
Class 6 Number: 6000
Class 7 Number: 6000
Class 8 Number: 6000
Class 9 Number: 6000


In [4]:
"""
Read Test Dataset
"""
test_img, test_label = load_mnist(img_root_path, kind='t10k')
test_img = np.reshape(test_img, (test_img.shape[0], 28, 28))

### 2. Define Image Dataset Class

In [5]:
from torch.utils import data
from PIL import Image

# define dataset class
class Dataset(data.Dataset):
    def __init__(self, img_array, label_array, transform=None):
        self.transform = transform

        self.img_array = img_array
        self.label_array = label_array

    # get number of data
    def __len__(self):
        return self.img_array.shape[0]

    # IO with dataloader
    def __getitem__(self, idx):
        img = Image.fromarray(self.img_array[idx])
        img = img.convert('L')

        label = int(self.label_array[idx])

        # transform data
        if self.transform is not None:
            img = self.transform(img)

        return img, label

### 3. Set Transform and Data Loader

In [6]:
from torchvision import transforms
import torch

batch_size = 32

# set image array
img_array = {'train': train_img_total, 'val': val_img_total}

# set label array
label_array = {'train': train_label_total, 'val': val_label_total}

# data augmentation
data_transform = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
}

# image dataset
image_dataset = {x: Dataset(img_array[x], label_array[x], data_transform[x]) for x in ['train', 'val']}

# data loader
data_loader = {x: torch.utils.data.DataLoader(image_dataset[x], batch_size=batch_size, shuffle=True, num_workers=4)
                for x in ['train', 'val']}

dataset_size = {x: len(image_dataset[x]) for x in ['train', 'val']}

# check GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### 4. Train Function

Find Model Parameters for Getting Minimum Validation Loss

Hyperparameters of Optimizer is Changed by Validation Loss

In [7]:
import copy

def train(model, criterion, optimizer, scheduler, save_file_path):
    # set epoch
    num_epoch = 100

    best_model_wt = copy.deepcopy(model.state_dict())
    best_loss = np.inf

    # count how many epoch validation loss is not improved
    patience = 0

    for epoch in range(num_epoch):
        # check patience for stopping 
        if patience == 10:
            break
            
        print('Epoch {}/{}'.format(epoch+1, num_epoch))
        
        # each epoch has train and validation phase
        for phase in ['train', 'val']:
            # train mode
            if phase == 'train':
                model.train()
            # evaluation mode
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_correct = 0
            
            # iteration by batch size
            for img, label in data_loader[phase]:
                img = img.to(device)
                label = label.to(device)
                
                # zero gradient parameter
                optimizer.zero_grad()
                
                # forward network
                with torch.set_grad_enabled(phase=='train'):
                    output = model(img)
                    _, pred = torch.max(output, 1)
                    loss = criterion(output, label)
                    
                    # back propagation and optimize only train phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                    epoch_loss += loss.item() * img.size(0)
                    epoch_correct += torch.sum(pred == label.data)
                    
            epoch_loss /= dataset_size[phase]
            epoch_acc = epoch_correct.double() / dataset_size[phase]
            
            # save weight by getting min validation loss
            if phase == 'val':
                scheduler.step(epoch_loss)
                
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wt = copy.deepcopy(model.state_dict())
                    model.load_state_dict(best_model_wt)
                    
                    # save file path
                    torch.save(model.state_dict(), save_file_path)
                    
                    # reset patience count
                    patience = 0
                else:
                    patience += 1
                    
                print('Learning Rate: {}'.format(optimizer.param_groups[0]['lr']))
                print('Best Validation Loss: {}'.format(best_loss))

### 5. Test Function

Model Prediction and Evaluate

Check Processing time

In [8]:
import time

def test(model, save_file_path):
    # load best weight model
    model.load_state_dict(torch.load(save_file_path))
    model.eval()

    # confusing matrix
    cm = np.zeros((num_class, num_class), dtype = np.int32)

    # transform for prediction
    transform_tensor = transforms.ToTensor()
    transform_normalize = transforms.Normalize((0.1307,), (0.3081,))

    correct = 0

    since = time.time()

    for idx in range(test_img.shape[0]):
        img = Image.fromarray(test_img[idx])
        img = img.convert('L')
        img = transform_tensor(img)
        img = transform_normalize(img)

        # add dim(batch) for matching train model
        img = img.view(1, img.shape[0], img.shape[1], img.shape[2])
        img = img.to(device)

        label = int(test_label[idx])

        # forward network
        output = model(img)
        _, pred = torch.max(output, 1)

        # count precision, recall on confusion matrix
        cm[label, int(pred)] += 1

        correct += int(pred) == label

    processing_time = time.time() - since
    processing_time /= test_img.shape[0]

    accuracy = float(correct) / test_img.shape[0]
    
    for i in range(num_class):
        line = []
        for j in range(num_class):
            line.append('{:4d}'.format(cm[i, j]))
            
        print(' '.join(line))

    print('Accuracy: {:.4f}'.format(accuracy))
    print('Time per Image: {:.4f}'.format(processing_time))

### 6. Model Selection

Find Effective Model using torchsummary package

Candidate Model are Choosen Resnet18, Mobilenetv2, Condensenet

Resnet18 and Mobilenetv2 from https://github.com/pytorch/vision/blob/master/torchvision/models

Condensenet from https://github.com/ShichenLiu/CondenseNet

Among Them, I would like to choose Mobilenetv2 and Condensenet

In [10]:
from torchsummary import summary
import torch
import torchvision
import torch.nn as nn

"""
Resnet18
"""

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torchvision.models.resnet18()

# modified first layer for gray image
model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), 
            padding=(3, 3), bias=False)

# modified last layer for changing class
num_ftr = model.fc.in_features
model.fc = nn.Linear(num_ftr, num_class)

model = model.to(device)

summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 14, 14]           3,136
       BatchNorm2d-2           [-1, 64, 14, 14]             128
              ReLU-3           [-1, 64, 14, 14]               0
         MaxPool2d-4             [-1, 64, 7, 7]               0
            Conv2d-5             [-1, 64, 7, 7]          36,864
       BatchNorm2d-6             [-1, 64, 7, 7]             128
              ReLU-7             [-1, 64, 7, 7]               0
            Conv2d-8             [-1, 64, 7, 7]          36,864
       BatchNorm2d-9             [-1, 64, 7, 7]             128
             ReLU-10             [-1, 64, 7, 7]               0
       BasicBlock-11             [-1, 64, 7, 7]               0
           Conv2d-12             [-1, 64, 7, 7]          36,864
      BatchNorm2d-13             [-1, 64, 7, 7]             128
             ReLU-14             [-1, 6

In [16]:
"""
MobilenetV2
"""
# already modified model for gray image
from mobilenetv2 import mobilenet_v2

model = mobilenet_v2()
model = model.to(device)

summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 14, 14]             288
       BatchNorm2d-2           [-1, 32, 14, 14]              64
             ReLU6-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]             288
       BatchNorm2d-5           [-1, 32, 14, 14]              64
             ReLU6-6           [-1, 32, 14, 14]               0
            Conv2d-7           [-1, 16, 14, 14]             512
       BatchNorm2d-8           [-1, 16, 14, 14]              32
  InvertedResidual-9           [-1, 16, 14, 14]               0
           Conv2d-10           [-1, 96, 14, 14]           1,536
      BatchNorm2d-11           [-1, 96, 14, 14]             192
            ReLU6-12           [-1, 96, 14, 14]               0
           Conv2d-13             [-1, 96, 7, 7]             864
      BatchNorm2d-14             [-1, 9

In [17]:
"""
Condensenet
"""
# already modified model for gray image
from condensenet import condensenet

model = condensenet()
model = model.to(device)

summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             144
       BatchNorm2d-2           [-1, 16, 28, 28]              32
              ReLU-3           [-1, 16, 28, 28]               0
  LearnedGroupConv-4           [-1, 32, 28, 28]               0
       BatchNorm2d-5           [-1, 32, 28, 28]              64
              ReLU-6           [-1, 32, 28, 28]               0
            Conv2d-7            [-1, 8, 28, 28]             576
       _DenseLayer-8           [-1, 24, 28, 28]               0
       BatchNorm2d-9           [-1, 24, 28, 28]              48
             ReLU-10           [-1, 24, 28, 28]               0
 LearnedGroupConv-11           [-1, 32, 28, 28]               0
      BatchNorm2d-12           [-1, 32, 28, 28]              64
             ReLU-13           [-1, 32, 28, 28]               0
           Conv2d-14            [-1, 8,

### 7. Set Loss Function and Optimizer

Loss Function : Cross Entropy for Classification

Optimizer : SGD

Scheduler : Reduce Learning Rate by Validation Loss

In [18]:
import torch.optim as optim
from torch.optim import lr_scheduler

# cross entrophy loss function
criterion = nn.CrossEntropyLoss()

# SGD optimizer
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

# schedule for changing learning rate by validation loss
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

### 8. Train and Test Mobilenet v2

In [21]:
model = mobilenet_v2()
model = model.to(device)

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

save_file_path = 'mobilenetv2.pth'

train(model, criterion, optimizer, scheduler, save_file_path)

Epoch 1/100
Learning Rate: 0.01
Best Validation Loss: 0.43746373959382373
Epoch 2/100
Learning Rate: 0.01
Best Validation Loss: 0.4163805858095487
Epoch 3/100
Learning Rate: 0.01
Best Validation Loss: 0.36118652508656185
Epoch 4/100
Learning Rate: 0.01
Best Validation Loss: 0.33984858204921087
Epoch 5/100
Learning Rate: 0.01
Best Validation Loss: 0.305906698624293
Epoch 6/100
Learning Rate: 0.01
Best Validation Loss: 0.305906698624293
Epoch 7/100
Learning Rate: 0.01
Best Validation Loss: 0.2874024902184804
Epoch 8/100
Learning Rate: 0.01
Best Validation Loss: 0.2874024902184804
Epoch 9/100
Learning Rate: 0.01
Best Validation Loss: 0.2696236774722735
Epoch 10/100
Learning Rate: 0.01
Best Validation Loss: 0.2696236774722735
Epoch 11/100
Learning Rate: 0.01
Best Validation Loss: 0.2696236774722735
Epoch 12/100
Learning Rate: 0.01
Best Validation Loss: 0.2696236774722735
Epoch 13/100
Learning Rate: 0.01
Best Validation Loss: 0.25607051078478493
Epoch 14/100
Learning Rate: 0.01
Best Validat

In [22]:
test(model, save_file_path)

 870    0   15    8    2    0  100    0    5    0
   2  982    0   11    0    0    3    0    2    0
  18    1  881    9   33    0   58    0    0    0
  25    7   11  897   40    1   18    0    1    0
   1    0   87   21  834    0   57    0    0    0
   1    0    0    0    0  963    0   27    3    6
  99    1   62   18   61    1  749    0    9    0
   0    0    0    0    0    6    0  974    0   20
   5    0    0    4    1    1    6    1  982    0
   0    0    0    0    0    5    0   31    2  962
Accuracy: 0.9094
Time per Image: 0.0054


### 9. Train and Test Condensenet

In [23]:
model = condensenet()
model = model.to(device)

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

save_file_path = 'condensenet.pth'

train(model, criterion, optimizer, scheduler, save_file_path)

Epoch 1/100
Learning Rate: 0.01
Best Validation Loss: 0.42809804852803546
Epoch 2/100
Learning Rate: 0.01
Best Validation Loss: 0.3083564596970876
Epoch 3/100
Learning Rate: 0.01
Best Validation Loss: 0.292668576002121
Epoch 4/100
Learning Rate: 0.01
Best Validation Loss: 0.2914826430678368
Epoch 5/100
Learning Rate: 0.01
Best Validation Loss: 0.2544105856815974
Epoch 6/100
Learning Rate: 0.01
Best Validation Loss: 0.24198614865541457
Epoch 7/100
Learning Rate: 0.01
Best Validation Loss: 0.24198614865541457
Epoch 8/100
Learning Rate: 0.01
Best Validation Loss: 0.24198614865541457
Epoch 9/100
Learning Rate: 0.01
Best Validation Loss: 0.21633389941851297
Epoch 10/100
Learning Rate: 0.01
Best Validation Loss: 0.20506977492570877
Epoch 11/100
Learning Rate: 0.01
Best Validation Loss: 0.20506977492570877
Epoch 12/100
Learning Rate: 0.01
Best Validation Loss: 0.20506977492570877
Epoch 13/100
Learning Rate: 0.01
Best Validation Loss: 0.20506977492570877
Epoch 14/100
Learning Rate: 0.01
Best V

In [24]:
test(model, save_file_path)

 863    2   25   11    4    1   84    0    9    1
   0  988    0    8    0    0    2    0    2    0
  13    2  915    9   22    0   38    0    1    0
  12    6    8  923   18    0   33    0    0    0
   1    1   35   18  900    0   43    0    2    0
   0    0    0    0    0  981    0   16    0    3
  87    3   38   19   64    0  777    0   12    0
   0    0    0    0    0    4    0  977    0   19
   2    0    1    2    0    2    2    0  991    0
   0    0    0    0    0    5    0   29    1  965
Accuracy: 0.9280
Time per Image: 0.0167


### 10. Random Erasing for Augmentation

Add Transform from https://github.com/zhunzhong07/Random-Erasing

Model : Condensenet

In [25]:
from random_erasing import transforms

p = 0.5
sh = 0.4
r1 = 0.3

# data augmentation
data_transform = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
        transforms.RandomErasing(probability = p, sh = sh, r1 = r1, mean = [0.4914]),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
}

# image dataset
image_dataset = {x: Dataset(img_array[x], label_array[x], data_transform[x]) for x in ['train', 'val']}

# data loader
data_loader = {x: torch.utils.data.DataLoader(image_dataset[x], batch_size=batch_size, shuffle=True, num_workers=4)
                for x in ['train', 'val']}

dataset_size = {x: len(image_dataset[x]) for x in ['train', 'val']}

In [26]:
model = condensenet()
model = model.to(device)

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

save_file_path = 'condensenet_re.pth'

train(model, criterion, optimizer, scheduler, save_file_path)

Epoch 1/100
Learning Rate: 0.01
Best Validation Loss: 0.49947190219163895
Epoch 2/100
Learning Rate: 0.01
Best Validation Loss: 0.3785525686542193
Epoch 3/100
Learning Rate: 0.01
Best Validation Loss: 0.29215894744793575
Epoch 4/100
Learning Rate: 0.01
Best Validation Loss: 0.27510109134515126
Epoch 5/100
Learning Rate: 0.01
Best Validation Loss: 0.27333004744847617
Epoch 6/100
Learning Rate: 0.01
Best Validation Loss: 0.26767446305354436
Epoch 7/100
Learning Rate: 0.01
Best Validation Loss: 0.26767446305354436
Epoch 8/100
Learning Rate: 0.01
Best Validation Loss: 0.25031330053011575
Epoch 9/100
Learning Rate: 0.01
Best Validation Loss: 0.22323818025986353
Epoch 10/100
Learning Rate: 0.01
Best Validation Loss: 0.22323818025986353
Epoch 11/100
Learning Rate: 0.01
Best Validation Loss: 0.21881425712505975
Epoch 12/100
Learning Rate: 0.01
Best Validation Loss: 0.21881425712505975
Epoch 13/100
Learning Rate: 0.01
Best Validation Loss: 0.2065012404123942
Epoch 14/100
Learning Rate: 0.01
Bes

In [27]:
test(model, save_file_path)

 893    2   13    8    3    0   76    0    4    1
   0  992    0    6    0    0    0    0    1    1
  18    1  900    7   34    0   39    0    1    0
   9    2    6  944   22    1   16    0    0    0
   1    1   18   12  931    0   37    0    0    0
   0    0    0    0    0  991    0    9    0    0
  77    1   34   20   61    0  802    0    5    0
   0    0    0    0    0    4    0  985    0   11
   1    1    1    2    0    1    2    0  992    0
   0    0    0    0    0    7    0   35    0  958
Accuracy: 0.9388
Time per Image: 0.0183
