# Kaggle- Multilayered Perceptron (MLP) implemention on MNIST dataset

Untill now we were using the MNIST dataset that is available in torchvision.dataset.Let us now load the dataset from Kaggle repo and train our model

In [13]:
import os
from pathlib import Path
import torch
from torch.utils.data import TensorDataset ,DataLoader
from torch import nn,optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
PATH=Path(r"C:\Users\hp\Documents\Praxis_Docs\THIRD TERM\Deeplearning\Assignment")
print(os.listdir(PATH))

['sample_submission.csv', 'test.csv', 'test.csv.zip', 'train.csv', 'train.csv.zip']


In [14]:
#Load Data

train=pd.read_csv(PATH/'train.csv')
test=pd.read_csv(PATH/'test.csv')
(train.shape,test.shape)

((42000, 785), (28000, 784))

In [15]:
# Extracting Input and Target Variable

x=train.drop("label",axis=1)
y=np.array(train['label'])
x.shape,y.shape

((42000, 784), (42000,))

In [16]:
#TRAIN TEST SPLIT using PYTORCH

torch_X_train = torch.from_numpy(x.values).type(torch.FloatTensor)/255
torch_y_train = torch.from_numpy(y).type(torch.LongTensor)
myDataset = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
valid_no  = int(0.2 * len(myDataset))#20% kept aside for Test/validation
# so divide the data into trainset and testset
trainSet,testSet = torch.utils.data.random_split(myDataset,(len(myDataset)-valid_no,valid_no))
print(f"len of trainSet {len(trainSet)} , len of testSet {len(testSet)}")
batch_size=64
train_loader  = DataLoader(trainSet , batch_size=batch_size ,shuffle=True) 
test_loader  = DataLoader(testSet , batch_size=batch_size ,shuffle=True)

len of trainSet 33600 , len of testSet 8400


In [17]:
print(valid_no)

8400


In [59]:
#Network :

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 70)
        self.fc2 = nn.Linear(70, 70)
        self.fc3 = nn.Linear(70, 64)
        self.fc4 = nn.Linear(64, 10)

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))

        # output so no dropout here
        x = F.log_softmax(self.fc4(x), dim=1)

        return x
        
model=Network()
#optimizer=optim.Adam(model.parameters(),lr=0.001)
#criterion=nn.NLLLoss()

# create a stochastic gradient descent optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001225, momentum=0.9925)
# create a loss function
criterion = nn.NLLLoss()

In [60]:
#Training :


epochs=10
train_losses,test_losses=[],[]
for e in range(epochs):
    running_loss=0
    for images,labels in train_loader:
        optimizer.zero_grad()
        log_ps=model(images)
        loss=criterion(log_ps,labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        
    else:
        test_loss=0
        accuracy=0
        
        with torch.no_grad():
            model.eval()
            for images,labels in test_loader:
                log_ps=model(images)
                test_loss+=criterion(log_ps,labels)
                ps=torch.exp(log_ps)
                top_p,top_class=ps.topk(1,dim=1)
                equals=top_class==labels.view(*top_class.shape)
                accuracy+=torch.mean(equals.type(torch.FloatTensor))
        model.train()
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))  

Epoch: 1/10..  Training Loss: 1.578..  Test Loss: 0.454..  Test Accuracy: 0.865
Epoch: 2/10..  Training Loss: 0.433..  Test Loss: 0.249..  Test Accuracy: 0.927
Epoch: 3/10..  Training Loss: 0.291..  Test Loss: 0.184..  Test Accuracy: 0.946
Epoch: 4/10..  Training Loss: 0.236..  Test Loss: 0.163..  Test Accuracy: 0.952
Epoch: 5/10..  Training Loss: 0.206..  Test Loss: 0.150..  Test Accuracy: 0.955
Epoch: 6/10..  Training Loss: 0.188..  Test Loss: 0.143..  Test Accuracy: 0.958
Epoch: 7/10..  Training Loss: 0.174..  Test Loss: 0.132..  Test Accuracy: 0.961
Epoch: 8/10..  Training Loss: 0.165..  Test Loss: 0.126..  Test Accuracy: 0.963
Epoch: 9/10..  Training Loss: 0.153..  Test Loss: 0.113..  Test Accuracy: 0.967
Epoch: 10/10..  Training Loss: 0.143..  Test Loss: 0.117..  Test Accuracy: 0.966


In [56]:
# save model :

print("Our model: \n\n", model, '\n')
print("The state dict keys: \n\n", model.state_dict().keys())

Our model: 

 Network(
  (fc1): Linear(in_features=784, out_features=60, bias=True)
  (fc2): Linear(in_features=60, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
) 

The state dict keys: 

 odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])


In [28]:
torch.save(model.state_dict(), 'checkpoint.pth')

In [29]:
state_dict = torch.load('checkpoint.pth')
print(state_dict.keys())

odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])


In [30]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [31]:
checkpoint = {'input_size': 784,
              'output_size': 10,
              'hidden_layers': [256,128,64],
              'state_dict': model.state_dict()}

torch.save(checkpoint, 'checkpoint.pth')

In [32]:
#Load test data

test_images = pd.read_csv(PATH/'test.csv')
test_image = test_images.loc[:,test_images.columns != "label"].values
test_dataset = torch.from_numpy(test_image).type(torch.FloatTensor)/255
print(test_dataset.shape)
#test_dataset = torch.utils.data.TensorDataset(test_dataset)
new_test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 100, shuffle = False)

torch.Size([28000, 784])


In [33]:
results = []
with torch.no_grad():
    model.eval()
    for images in new_test_loader:
        output = model(images)
        ps = torch.exp(output)
        top_p, top_class = ps.topk(1, dim = 1)
        results += top_class.numpy().tolist()

In [34]:
#Check the results


predictions = np.array(results).flatten()
print(predictions[:5])
print(predictions.shape)

[2 0 9 4 3]
(28000,)


In [35]:
submissions=pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)),
                         "Label": predictions})
submissions.to_csv("my_submissions.csv", index=False, header=True)

In [36]:
model.parameters

<bound method Module.parameters of Network(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)>

In [61]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

65114