In [1]:
import logging
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
import torch
from torch import nn

In [2]:
logger = logging.getLogger(__name__)
np.random.seed(2019)

log_path = datetime.now().strftime('./logs/%Y-%m-%d-%H-%M-%S.log')
logging.basicConfig(filename=log_path, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())

In [3]:
x_tr = pd.read_pickle('./dataset/train_images.pkl')
x_tr = x_tr.reshape((x_tr.shape[0], 1, x_tr.shape[1], x_tr.shape[2]))

y_tr = pd.read_csv('./dataset/train_labels.csv')
y_tr = y_tr.values[:, 1]

num_classes = 10

In [4]:
print(x_tr.shape)
print(y_tr.shape)

(40000, 1, 64, 64)
(40000,)


In [5]:
%matplotlib widget

plt.figure()
plt.title('Classes Histogram')
plt.xlabel('Class')
plt.ylabel('Count')
plt.xticks(range(num_classes + 1))
plt.hist(y_tr, bins=range(num_classes + 1), histtype='bar', rwidth=0.9)
plt.show()

FigureCanvasNbAgg()

In [6]:
def make_layers(cfg, in_channels=1, batch_norm=False):
    layers = []
    for l in cfg:
        if l == 'M':
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        else:
            layers.append(nn.Conv2d(in_channels, l, kernel_size=3, padding=1))
            if batch_norm:
                layers.append(nn.BatchNorm2d(l))
            layers.append(nn.ReLU(inplace=True))
            in_channels = l
    return nn.Sequential(*layers)

In [7]:
class VGG(nn.Module):
    def __init__(self, features, avgpool_size, num_classes, init_weights=True):
        super(VGG, self).__init__()  # Initialize
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d(avgpool_size)
        self.classifier = nn.Sequential(
            nn.Linear(128 * 4 * 4, 1024),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(1024, num_classes)
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view((x.size(0), -1))
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [9]:
num_tests = 1
test_size = 0.2
test_results = []

vgg_cfg = [64, 64, 'M', 128, 128, 'M'] #, 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
vgg_avgpool_size = (4, 4)

epochs = 10
mini_batch_size = 256
mini_batch_size_cv = 128
mini_batch_logs = 25

checkpoint_epochs = 5
checkpoint_path = './checkpoints/vgg_e{epoch}.pt'

cross_validation_epochs = 5

tr_losses = []
cv_losses = []

In [None]:
for _ in range(num_tests):
    tr_idx, cv_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=test_size).split(x_tr, y_tr))

    vgg = VGG(make_layers(vgg_cfg, in_channels=1, batch_norm=True),
              num_classes=num_classes,
              avgpool_size=vgg_avgpool_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters())

    for epoch in range(epochs):
        mini_batch_losses = []
        num_mini_batches = tr_idx.shape[0] / mini_batch_size
        for mini_batch, mini_batch_idx in enumerate(np.split(tr_idx, num_mini_batches)):
            x_mini_batch = torch.from_numpy(x_tr[mini_batch_idx])
            y_mini_batch = torch.from_numpy(y_tr[mini_batch_idx]).long()

            y_pred = vgg(x_mini_batch)  # Calculate output of train
            loss = criterion(y_pred, y_mini_batch)  # Calculate loss on train

            mini_batch_losses.append(loss.data.item())
            if (mini_batch + 1) % mini_batch_logs == 0:
                logger.info(f'Epoch : {epoch + 1}  Mini Batch : {mini_batch + 1}  Loss : {loss.data.item()}')

            optimizer.zero_grad()  # Reset gradients so it wouldn't be accumulated
            loss.backward()  # Calculate gradients
            optimizer.step()  # Update the weights

        tr_loss = sum(mini_batch_losses)
        logger.info(f'Epoch : {epoch + 1}  Train Loss : {tr_loss}')
        tr_losses.append(tr_loss)

        if (epoch + 1) % checkpoint_epochs == 0:
            torch.save(vgg, checkpoint_path.format(epoch=epoch))

        if (epoch + 1) % cross_validation_epochs == 0:
            mini_batch_losses = []
            num_mini_batches = cv_idx.shape[0] / mini_batch_size_cv
            for mini_batch, mini_batch_idx in enumerate(np.split(cv_idx, num_mini_batches)):
                x_mini_batch = torch.from_numpy(x_tr[mini_batch_idx])
                y_mini_batch = torch.from_numpy(y_tr[mini_batch_idx]).long()

                y_pred = vgg(x_mini_batch)  # Calculate output of cross validation
                loss = criterion(y_prd, y_mini_batch)  # Calculate loss on cross validation

                mini_batch_losses.append(loss.data.item())

            cv_loss = sum(mini_batch_losses)
            logger.info(f'Epoch : {epoch + 1}  Cross Validation Loss : {cv_loss}')
            cv_losses.append(cv_loss)