## Import Libraries

In [1]:
import torch
from torch.utils.data import TensorDataset ,DataLoader
from torch import nn,optim
import torch.nn.functional as F

## Data Preparation

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/test.csv
/kaggle/input/train.csv
/kaggle/input/sample_submission.csv


In [3]:
# Importing Training and Test data
train=pd.read_csv('/kaggle/input/train.csv')
test=pd.read_csv('/kaggle/input/test.csv')
print('Training Data shape: ',train.shape)
print('Test Data shape: ',test.shape)

Training Data shape:  (42000, 785)
Test Data shape:  (28000, 784)


In [4]:
x=train.drop("label",axis=1)
y=np.array(train['label'])

In [5]:
# Convert Data to Tensor
torch_X_train = torch.from_numpy(x.values).type(torch.FloatTensor)/255
torch_y_train = torch.from_numpy(y).type(torch.LongTensor)

myDataset = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
valid_no  = int(0.2 * len(myDataset))

# Divide the data into trainset and testset
trainSet,testSet = torch.utils.data.random_split(myDataset,(len(myDataset)-valid_no,valid_no))
print(f"len of trainSet {len(trainSet)} , len of testSet {len(testSet)}")
batch_size=64

# Load Data in memory
train_loader  = DataLoader(trainSet , batch_size=batch_size ,shuffle=True) 
test_loader  = DataLoader(testSet , batch_size=batch_size ,shuffle=True)

len of trainSet 33600 , len of testSet 8400


## Building ANN Architecture

In [6]:
from torch import nn, optim
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64,32)
        self.fc5 = nn.Linear(32,10)

        # Dropout module with 0.1 drop probability
        self.dropout = nn.Dropout(p=0.1)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))

        # No dropout at output
        x = F.log_softmax(self.fc5(x), dim=1)

        return x
        
model=Network()
print(model)
# specify loss function
criterion = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)

Network(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=32, bias=True)
  (fc5): Linear(in_features=32, out_features=10, bias=True)
  (dropout): Dropout(p=0.1)
)


## Training Model

In [7]:
epochs=20
train_losses,test_losses=[],[]
for e in range(epochs):
    running_loss=0
    for images,labels in train_loader:
        optimizer.zero_grad()
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/20..  Training Loss: 1.601..  Test Loss: 0.527..  Test Accuracy: 0.853
Epoch: 2/20..  Training Loss: 0.431..  Test Loss: 0.226..  Test Accuracy: 0.934
Epoch: 3/20..  Training Loss: 0.242..  Test Loss: 0.179..  Test Accuracy: 0.947
Epoch: 4/20..  Training Loss: 0.171..  Test Loss: 0.138..  Test Accuracy: 0.961
Epoch: 5/20..  Training Loss: 0.135..  Test Loss: 0.132..  Test Accuracy: 0.963
Epoch: 6/20..  Training Loss: 0.109..  Test Loss: 0.114..  Test Accuracy: 0.968
Epoch: 7/20..  Training Loss: 0.093..  Test Loss: 0.113..  Test Accuracy: 0.968
Epoch: 8/20..  Training Loss: 0.081..  Test Loss: 0.121..  Test Accuracy: 0.969
Epoch: 9/20..  Training Loss: 0.071..  Test Loss: 0.116..  Test Accuracy: 0.969
Epoch: 10/20..  Training Loss: 0.061..  Test Loss: 0.111..  Test Accuracy: 0.973
Epoch: 11/20..  Training Loss: 0.054..  Test Loss: 0.111..  Test Accuracy: 0.970
Epoch: 12/20..  Training Loss: 0.049..  Test Loss: 0.105..  Test Accuracy: 0.974
Epoch: 13/20..  Training Loss: 0.046.

In [8]:
print("Our model: \n\n", model, '\n')
print("The state dict keys: \n\n", model.state_dict().keys())

Our model: 

 Network(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=32, bias=True)
  (fc5): Linear(in_features=32, out_features=10, bias=True)
  (dropout): Dropout(p=0.1)
) 

The state dict keys: 

 odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias', 'fc5.weight', 'fc5.bias'])


In [9]:
# Save Checkpoint
torch.save(model.state_dict(), 'checkpoint.pth')

In [10]:
state_dict = torch.load('checkpoint.pth')
print(state_dict.keys())

odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias', 'fc5.weight', 'fc5.bias'])


In [11]:
model.load_state_dict(state_dict)

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [12]:
checkpoint = {'input_size': 784,
              'output_size': 10,
              'hidden_layers': [784,256,128,128,64],
              'state_dict': model.state_dict()}

torch.save(checkpoint, 'checkpoint.pth')

## Testing Model

In [13]:
test_images = pd.read_csv("../input/test.csv")
test_image = test_images.loc[:,test_images.columns != "label"].values
test_dataset = torch.from_numpy(test_image).type(torch.FloatTensor)/255
print(test_dataset.shape)
#test_dataset = torch.utils.data.TensorDataset(test_dataset)
new_test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)

torch.Size([28000, 784])


In [14]:
results = []
with torch.no_grad():
    model.eval()
    for images in new_test_loader:
        output = model(images)
        ps = torch.exp(output)
        top_p, top_class = ps.topk(1, dim = 1)
        results += top_class.numpy().tolist()


In [15]:
predictions = np.array(results).flatten()
print(predictions[:5])
print(predictions.shape)

[2 0 9 0 3]
(28000,)


In [16]:
submissions=pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)),
                         "Label": predictions})
submissions.to_csv("submission_1.csv", index=False, header=True)