In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer, OneHotEncoder

import matplotlib.pyplot as plt
from matplotlib import cm

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))


In [None]:
# Import the data
train = pd.read_csv("../input/train.csv")
X_train_full = np.asarray(train.iloc[:, 1:], dtype=np.float32)
y_train_full = np.asarray(train.iloc[:, 0], dtype=np.float32).reshape(train.shape[0], 1)
test = pd.read_csv("../input/test.csv")
X_test = np.asarray(test, dtype=np.float32)

# Normalize to [0, 1] for the NN
normalizer = Normalizer(norm = "l2", copy=False)
X_train_full = normalizer.transform(X_train_full)
X_test = normalizer.transform(X_test)

# Put in 2d shape
X_train_full = X_train_full.reshape((train.shape[0], 28, 28))
X_test = X_test.reshape((test.shape[0], 28, 28))

# One-hot encoding of the output
# one_hot_enc = OneHotEncoder(categories="auto", dtype=np.int)
one_hot_enc = OneHotEncoder(n_values="auto", dtype=np.int)
y_train_number = y_train_full.flatten().astype(np.int64) # for plotting
y_train_full = one_hot_enc.fit_transform(y_train_full)
print(y_train_full.shape)

# Define a validation set for the training 
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_number, test_size=0.25)
print(X_train.shape, X_valid.shape, X_test.shape)


In [None]:
# Visualize the data
for digit in range(10):
    input_bool_indices = y_train_number == digit
    avg_digit = np.mean(X_train_full[input_bool_indices, :, :], axis=0)
    med_digit = np.median(X_train_full[input_bool_indices, :, :], axis=0)
    fig = plt.figure(digit)
    fig.add_subplot(1, 2, 1)
    plt.imshow(avg_digit, cmap=cm.Greys)
    fig.add_subplot(1, 2, 2)
    plt.imshow(med_digit, cmap=cm.Greys)
# fig.show()


In [None]:
# Prepare inputs for PyTorch
class MnistDataset(Dataset):
    def __init__(self, X, y, transforms=[]):
        self.X = X
        self.y = y#.toarray()
        self.transforms = transforms
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, index):
        X = torch.from_numpy(self.X[index, :, :].reshape((1, 28, 28)))
        for trans in self.transforms:
            X = trans(X)
        return X, torch.from_numpy(np.array(self.y[index]))


data_params = {'batch_size': 128,
              'shuffle': True,
              'num_workers': 4}
   
transforms_list = []
train_ds = MnistDataset(X_train, y_train_number, transforms_list)
valid_ds = MnistDataset(X_valid, y_valid, transforms_list)
train_loader = DataLoader(train_ds, **data_params)
valid_loader = DataLoader(valid_ds, **data_params)


In [None]:
class MnistNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.fc1 = nn.Linear(512, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        # (28 - 5 + 1) ** 2 * 8 = 24 ** 2 * 8
        x = self.conv1(x)
        # ((24 - 2) / 2 + 1) ** 2 * 8   12 ** 2 * 8
        x = F.max_pool2d(x, 2)
        x = F.relu(x)
        # (12 - 5 + 1) ** 2 * 16 = 8 ** 2 * 16
        x = self.conv2(x)
        x = F.dropout2d(x)
        #  ((8 - 2) / 2 + 1) ** 2 * 16 = 4 ** 2 * 16 = 256
        x = F.max_pool2d(x, 2)
        x = F.relu(x)
        # Flatten layer
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.softmax(x, dim=0)

model = MnistNN()
# model = MnistNN().cuda()

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)


In [None]:
def train(epoch):
    model.train()
    for train_batch_indx, (Xt, yt) in enumerate(train_loader):
        optimizer.zero_grad()
        Xt, yt = Variable(Xt), Variable(yt)
        # Xt, yt = Xt.cuda(async=True), yt.cuda(async=True) # On GPU
        train_pred = model(Xt)
        train_loss = F.cross_entropy(train_pred, yt)           
        # Start backwards step of backprop
        train_loss.backward()
        optimizer.step()
        if train_batch_indx % 50 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tTLoss: {:.6f}'.format(
                epoch, 
                train_batch_indx * len(Xt), len(train_loader.dataset),
                100. * train_batch_indx / len(train_loader), 
                train_loss.item()))


In [None]:
# torch.cuda.get_device_name(0)

from sklearn.metrics import confusion_matrix
for epoch in range(10):
    conf_mat = np.ndarray((10, 10))
    train(epoch)
    for Xv, yv in valid_loader:
        Xv, yv = Variable(Xv), Variable(yv)
        # Xv, yv = Xv.cuda(async=True), yv.cuda(async=True) # On GPU
        valid_prob = model(Xv)
        # print(valid_prob)
        valid_pred = valid_prob.argmax(dim=1)
        # print(valid_pred.shape)
        conf_mat += confusion_matrix(yv, valid_pred, labels=list(range(10)))
    print(conf_mat)
    print("--")

print("---------------------")
print(conf_mat)
