In [108]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import autograd
from torch.utils import data
from torch.backends import cudnn

import pandas as pd
import numpy as np
import random as rd

import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

#### HYPERPARAMETER & CONFIGURATIONS

In [None]:
learning_rate = 0.01
max_epochs = 10
n_batches = 42
batch_size = 800

params = {'batch_size': 6,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 100

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
cudnn.benchmark = True

#### PRINTING DIGITS

In [None]:
# PRINTING HANDLING HERE
def print_digit(image):
    # digits_train.head()
    image = image.view(28, 28)
    plt.figure(1, figsize=(3, 3))
    plt.imshow(image.numpy(), cmap=plt.cm.gray_r, interpolation='nearest')
    plt.show()
    
def random_digit(X, y):
    x = rd.randint(0, X[:,0].shape[0])
    
    print_digit(X[x,:])
    print(y[x])

#### DATASET CLASS

In [None]:
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = torch.load('data/' + ID + '.pt')
        y = self.labels[ID]

        return X, y

In [None]:
#### NEURAL NET

#### READ DATA

In [2]:
# import data
digits_train = pd.read_csv('data/train.csv')
digits_submission = pd.read_csv('data/test.csv')

In [112]:
len(digits_train)

42000

In [None]:
# Datasets
partition = # IDs
labels = # Labels

# Generators
training_set = Dataset(partition['train'], labels)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(partition['validation'], labels)
validation_generator = data.DataLoader(validation_set, **params)

In [None]:
# Loop over epochs
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations
        [...]

    # Validation
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in validation_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)

            # Model computations
            [...]

In [4]:
# data overview
print(digits_train.shape)
print(digits_submission.shape)
print(digits_train.loc[:,'label'].value_counts())

(42000, 785)
(28000, 784)
1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64


In [69]:
# Returns ((n_samples, pixels), n_sample)
def preprocess_labeled_data_into_tensors(df, label='label'):
    train_tensor = torch.tensor(df.drop(label, axis=1).to_numpy(), dtype=torch.float)
    labels_tensor = torch.tensor(df[label].to_numpy()) 
    
    return train_tensor, labels_tensor

# Returns ((n_samples, pixels), n_sample)
def preprocess_unlabeled_data_into_tensor(df):
    test = torch.tensor(df.to_numpy())
    
    return test

In [86]:
# NEURAL NETWORK IMPLEMENTATION

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # three layers, input -> 250 -> 250 -> 10
        self.linear1 = nn.Linear(784, 250)
        self.linear2 = nn.Linear(250, 250)
        self.linear3 = nn.Linear(250, 10)

    def forward(self, x):
#         print(x.shape)
        # activation functions between layers
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
#         print(x.shape)
        return F.log_softmax(x, dim=)

In [87]:
# training data: nr_batches x (data, target)
# data : tensor (elements_in_batch, 28, 28)
# target : tensor (elements_in_batch, 1)
def batch_data(X, y):
    training = []

    for i in range(n_batches):
        # Local batches and labels
        local_X, local_y = X_train[i * batch_size : (i + 1) * batch_size, ], y_train[i * batch_size : (i + 1) * batch_size, ]
        training.append((local_X, local_y))
        
    return training

In [95]:
def train_nn(training_data):
    for epoch in range(max_epochs):
        # training data: nr_batches x (data, target)
        # data : tensor (elements_in_batch, 28, 28) NOPE
        # target : tensor (elements_in_batch, 1)
        for batch_idx, (data, target) in enumerate(training_data):
            data, target = autograd.Variable(data), autograd.Variable(target)            
            # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
            # data = data.view(-1, 28*28)
            print(data.shape)
            # zero gradients to prevent accumulation
            optimizer.zero_grad()
            # nn output
            # nSamples * nChannels? * nPixels
            
            # input = torch.randn(1, 784)
            # out = net(input)
            # print(out)
            # print(out.shape)
            print(data.shape)
            net_out = net(data)
            # get loss
            loss = criterion(net_out, target)
            # backpropagate
            loss.backward()
            # optimization step
            optimizer.step()
            
            if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data), len(training_data.dataset),
                               100. * batch_idx / len(train_loader), loss.data[0]))
                
# train_nn(train_batched)

In [96]:
# create NNet instance and initialize optimizer and criterion
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
criterion = nn.NLLLoss()
print(net)

Net(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=250, bias=True)
  (linear3): Linear(in_features=250, out_features=10, bias=True)
)


In [102]:
# params = list(net.parameters())
# print(len(params))
# print(net.parameters())
# print(params[0].size())  # conv1's .weight

# # nSamples * nChannels? * nPixels
# input = torch.randn(1, 784)
# out = net(input)
# print(out)
# print(out.shape)

# net.zero_grad()
# out.backward(torch.randn(1, 10))

# output = net(input)
# target = torch.randn(10)  # a dummy target, for example
# target = target.view(1, -1)  # make it the same shape as output
# criterion = nn.MSELoss()

# loss = criterion(output, target)
# print(loss)

# print(loss.grad_fn)  # MSELoss
# print(loss.grad_fn.next_functions[0][0])  # Linear
# print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

# net.zero_grad()     # zeroes the gradient buffers of all parameters

# print('conv1.bias.grad before backward')
# print(net.linear1.bias.grad)

# loss.backward()

# print('conv1.bias.grad after backward')
# print(net.linear1.bias.grad)
# print(net.linear1.bias.grad)

In [98]:
X, y = preprocess_labeled_data_into_tensors(digits_train)    
print(X.shape)
print(y.shape)

torch.Size([42000, 784])
torch.Size([42000])


In [99]:
# partition data
X_train, X_test,y_train,y_test = train_test_split(X, y, test_size=0.2)
train_batched = batch_data(X_train, y_train)
print(train_batched[0][0].shape)
print(train_batched[0][1].shape)
print(len(train_batched))
# train_nn(X_train)

torch.Size([800, 784])
torch.Size([800])
42


In [100]:
train_nn(train_batched)

torch.Size([800, 784])
torch.Size([800, 784])


AttributeError: 'list' object has no attribute 'dataset'