In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
import torchvision
import torch
from torch import nn

## Loading the Datasets

In [None]:
df_train = pd.read_csv("../input/digit-recognizer/train.csv", dtype=np.float32)
df_test = pd.read_csv("../input/digit-recognizer/test.csv", dtype=np.float32)
df_sample = pd.read_csv("../input/digit-recognizer/sample_submission.csv")

In [None]:
df_train.head()

## Normalize and Train-Val split

In [None]:
Y = df_train['label'].to_numpy()
X = df_train.iloc[:,1:785]
X = X.values/255.0

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
df_test = df_test.values/255.0

In [None]:
X_train = X_train.reshape(X_train.shape[0],1,28,28)
X_val = X_val.reshape(X_val.shape[0],1,28,28)
X_test = df_test.reshape(df_test.shape[0],1,28,28)

In [None]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

## Coverting to Tensor and Dataloader

In [None]:
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train).type(torch.LongTensor)

# create feature and targets tensor for test set.
X_val = torch.tensor(X_val)   
y_val = torch.tensor(y_val).type(torch.LongTensor)

In [None]:
n_epochs = 16
batch_size = 256
learning_rate = 0.01
momentum = 0.5
log_interval = 10

In [None]:
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train,y_train)
val = torch.utils.data.TensorDataset(X_val,y_val)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
val_loader = torch.utils.data.DataLoader(val, batch_size = batch_size, shuffle = True)

## Sample Visualization

In [None]:
figure = plt.figure(figsize=(10, 8))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train), size=(1,)).item()
    img, label = train[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.axis("off")
    #img = img.reshape(28,28)
    plt.imshow(img.squeeze())#, cmap="gray")
plt.show()

In [None]:
examples = iter(val_loader)
example_data, example_targets = examples.next()
for i in range(6):
    plt.subplot(2,3,i+1)
    plt.imshow(example_data[i].reshape(28,28), cmap='gray')
plt.show() 

## Model Architecture

In [None]:
class MNIST_CNN(nn.Module):
    def __init__(self):
        super(MNIST_CNN,self).__init__()

        self.conv1 = nn.Sequential(         
            nn.Conv2d(in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(in_channels = 16, 
            out_channels = 32, 
            kernel_size = 5,
            stride = 1, 
            padding = 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size= 2),                
        )

        # fully connected layer, output 10 classes
        self.out = nn.Linear(in_features= 32 * 7 * 7,out_features= 10)
        self.sig1 = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        output = self.sig1(output)
        return output, x  



In [None]:
cnn = MNIST_CNN()
print(cnn)

## Loss Function

In [None]:
loss_func = nn.NLLLoss()
loss_func

## Adam Optimizer

In [None]:
from torch import optim
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)   
optimizer

## Train Function

In [None]:
def train(epoch):
  cnn.train()
  net_loss = 0
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output,x = cnn(data)
    pred = output.data.max(1, keepdim=True)[1]
    loss = loss_func(output, target)
    loss.backward()
    optimizer.step()
    
    net_loss = net_loss + loss.item()
      #torch.save(cnn.state_dict(), '/results/model.pth')
      #torch.save(optimizer.state_dict(), '/results/optimizer.pth')
  return net_loss

## Test Function

In [None]:
def test():
  cnn.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in val_loader:
      output,x = cnn(data)
      test_loss += loss_func(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(val_loader.dataset)
  acc = correct / len(val_loader.dataset)
  return test_loss,acc

## Driver Code

In [None]:
LOSS = []
ACC = []
for epoch in range(1, n_epochs + 1):
  print("--- Epoch {} ---".format(epoch))
  epoch_loss = train(epoch)
  LOSS.append(epoch_loss)
  print("\tBCE Loss (Training) : {} ".format(epoch_loss))
  tloss,tacc =  test()
  print("\tTest Accuracy : {} % ".format(tacc))
  ACC.append(tacc)

## Accuracy vs Loss Plot

In [None]:
xx = np.arange(n_epochs)
acc = torch.Tensor(ACC).detach().cpu().numpy()
lss = torch.Tensor(LOSS).detach().cpu().numpy()
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(12,6))
fig.suptitle('ACC vs LOSS')
ax1.plot(xx, acc, c='green')
ax2.plot(xx, lss, c='red')
plt.show()

## Saving the model 

In [None]:
torch.save(cnn.state_dict(),'mnist-cnn.pt')

## Loading the Model

In [None]:
modelpath = torch.load('mnist-cnn.pt')
model = MNIST_CNN()
model.load_state_dict(modelpath)
model.eval()

## Preparing Test Set

In [None]:
X_test.shape

In [None]:
fake_labels = np.zeros(X_test.shape[0])
fake_labels = torch.tensor(fake_labels).type(torch.LongTensor)

In [None]:
test = torch.tensor(X_test)
test_data = torch.utils.data.TensorDataset(test, fake_labels)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,shuffle=False)

## Testing on sample Test images 

In [None]:
examples = iter(test_loader)
example_data, example_targets = examples.next()
plt.imshow(example_data[0][0])
plt.axis("off")
plt.show() 
model.eval()
with torch.no_grad():
  img = example_data[0][0]
  img = img[None, None]
  output,x = model(img)
  index = output.data.cpu().numpy().argmax()
  print("PREDICTED CLASS = ",index)

## Submission

In [None]:
submission = [['ImageId', 'Label']]
with torch.no_grad():
  image_id = 1
  for images, labels in test_loader:
    cnn.eval()
    output,x = cnn(images)
    ps = torch.exp(output)
    top_p, top_class = ps.topk(1, dim=1)
    for prediction in top_class:
            submission.append([image_id, prediction.item()])
            image_id += 1

In [None]:
submission_df = pd.DataFrame(submission)
submission_df.columns = submission_df.iloc[0]
submission_df = submission_df.drop(0, axis=0)

In [None]:
submission_df

In [None]:
submission_df.to_csv("submission.csv", index=False)