In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import autograd
from torch.utils import data
from torch.backends import cudnn

import pandas as pd
import numpy as np
import random as rd

import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

#### HYPERPARAMETER & CONFIGURATIONS

In [2]:
learning_rate = 0.01
max_epochs = 10
n_batches = 42
batch_size = 800

params = {'batch_size': 420,
          'shuffle': True,
          'num_workers': 6,
          'learning_rate' : 0.01}

max_epochs = 100

In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
cudnn.benchmark = True

#### PRINTING DIGITS

In [4]:
# PRINTING HANDLING HERE
def print_digit(image):
    # digits_train.head()
    image = image.view(28, 28)
    plt.figure(1, figsize=(3, 3))
    plt.imshow(image.numpy(), cmap=plt.cm.gray_r, interpolation='nearest')
    plt.show()
    
def random_digit(X, y):
    x = rd.randint(0, X[:,0].shape[0])
    
    print_digit(X[x,:])
    print(y[x])

#### DATASET CLASS

In [11]:
class MNIST(data.IterableDataset):
    'Custom MNIST dataset for Kaggle challange'
    def __init__(self, path):
        'Initialization'
        self.X, self.y = self.__preprocessData(path)
        
    def __preprocessData(self, path):
        digits_train = pd.read_csv(path)
        
        train_tensor = torch.tensor(digits_train.drop('label', axis=1).to_numpy(), dtype=torch.int32)
        labels_tensor = torch.tensor(digits_train['label'].to_numpy()) 
        
        return train_tensor, labels_tensor
    
    def __iter__(self, start=0, end=-1):
        
        return iter(zip(self.X[start : end], self.y[start : end]))

        
    def __len__(self):
        return self.X.shape[0]

In [12]:
a = MNIST('data/train.csv')

In [None]:
for x, y in iter(validation):
    print(x.shape, y.shape)
    break

#### NEURAL NET

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # three layers, input -> 250 -> 250 -> 10
        self.linear1 = nn.Linear(784, 250)
        self.linear2 = nn.Linear(250, 250)
        self.linear3 = nn.Linear(250, 10)

    def forward(self, x):
        # activation functions between layers
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return F.log_softmax(x, dim=1)

#### READ DATA

In [None]:
# Datasets
# TODO partition dataset into validation and training data
partition = # IDs
# Add labels
labels = # Labels

# Generators
training_set = Dataset(partition['train'], labels)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(partition['validation'], labels)
validation_generator = data.DataLoader(validation_set, **params)

In [None]:
# Loop over epochs
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations
        [...]

    # Validation
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in validation_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)

            # Model computations
            [...]

In [None]:
# # data overview
# print(digits_train.shape)
# print(digits_submission.shape)
# print(digits_train.loc[:,'label'].value_counts())


# # Returns ((n_samples, pixels), n_sample)
# def preprocess_labeled_data_into_tensors(df, label='label'):
#     train_tensor = torch.tensor(df.drop(label, axis=1).to_numpy(), dtype=torch.float)
#     labels_tensor = torch.tensor(df[label].to_numpy()) 
    
#     return train_tensor, labels_tensor

# # Returns ((n_samples, pixels), n_sample)
# def preprocess_unlabeled_data_into_tensor(df):
#     test = torch.tensor(df.to_numpy())
    
#     return test

# # training data: nr_batches x (data, target)
# # data : tensor (elements_in_batch, 28, 28)
# # target : tensor (elements_in_batch, 1)
# def batch_data(X, y):
#     training = []

#     for i in range(n_batches):
#         # Local batches and labels
#         local_X, local_y = X_train[i * batch_size : (i + 1) * batch_size, ], y_train[i * batch_size : (i + 1) * batch_size, ]
#         training.append((local_X, local_y))
        
#     return training

In [None]:
def train_nn(training_data):
    for epoch in range(max_epochs):
        # training data: nr_batches x (data, target)
        # data : tensor (elements_in_batch, 28, 28) NOPE
        # target : tensor (elements_in_batch, 1)
        for batch_idx, (data, target) in enumerate(training_data):
            data, target = autograd.Variable(data), autograd.Variable(target)            
            # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
            # data = data.view(-1, 28*28)
            print(data.shape)
            # zero gradients to prevent accumulation
            optimizer.zero_grad()
            # nn output
            # nSamples * nChannels? * nPixels
            
            # input = torch.randn(1, 784)
            # out = net(input)
            # print(out)
            # print(out.shape)
            print(data.shape)
            net_out = net(data)
            # get loss
            loss = criterion(net_out, target)
            # backpropagate
            loss.backward()
            # optimization step
            optimizer.step()
            
            if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data), len(training_data.dataset),
                               100. * batch_idx / len(train_loader), loss.data[0]))
                
# train_nn(train_batched)

In [None]:
# create NNet instance and initialize optimizer and criterion
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
criterion = nn.NLLLoss()
print(net)

In [None]:
X, y = preprocess_labeled_data_into_tensors(digits_train)    
print(X.shape)
print(y.shape)

In [None]:
# partition data
X_train, X_test,y_train,y_test = train_test_split(X, y, test_size=0.2)
train_batched = batch_data(X_train, y_train)
print(train_batched[0][0].shape)
print(train_batched[0][1].shape)
print(len(train_batched))
# train_nn(X_train)

In [None]:
train_nn(train_batched)