In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm, tqdm_notebook

In [None]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
train_images=pd.read_csv('train.csv')
test_images=pd.read_csv('test.csv')

In [None]:
train_target=train_images['label']
train_images=train_images.drop(['label'], axis=1)

###Normalize and Reshape

In [None]:
train_images=train_images/255.0
test_images=test_images/255.0

train_images=train_images.values.reshape(-1, 1, 28,28)
test_images=test_images.values.reshape(-1,1, 28,28)

###Create Dataset

In [None]:
class MNISTDataset(Dataset):
  def __init__(self, x_train, y_train, mode):
    self.x_train=x_train
    self.y_train=y_train
    self.mode=mode
  def __len__(self, ):
    return len(self.x_train)
  def __getitem__(self, index):
    if self.mode=='test':
      return self.x_train[index]
    else:
      return self.x_train[index], self.y_train[index]

###CNN

In [None]:
class DigitRecognize(nn.Module):
  def __init__(self, n_classes):
    super().__init__()
    self.conv1=nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, padding=2, stride=1),
        nn.ReLU(),
    )
    self.conv2=nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2, stride=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv3=nn.Sequential(
        nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=2, stride=1),
        nn.ReLU()
    )
    self.conv4=nn.Sequential(
        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=2, stride=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv5=nn.Sequential(
        nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=2, stride=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.dropOut=nn.Dropout2d(0.33)
    self.fc1=nn.Linear(1600, 1000)
    self.dropOut_2=nn.Dropout()
    self.fc2=nn.Linear(1000, 10)
  def forward(self, x):
    x=torch.tensor(x, dtype=torch.float32)
    x=self.conv1(x)
    x=self.conv2(x)
    x=self.conv3(x)
    x=self.dropOut(x)
    x=self.conv4(x)
    x=self.conv5(x)
    x=self.dropOut(x)
    x=x.view(x.size(0),-1)
    x=self.fc1(x)
    x=self.dropOut_2(x)
    x=self.fc2(x)
    return x

###Train Epoch

In [None]:
def train_epoch(model, train_data, optimizer, criterion):
  running_loss=0.0
  running_correct=0
  running_total=0
  model.train()
  for x_batch, y_batch in train_data:
    y_batch=y_batch.type(torch.LongTensor)
    x_batch=x_batch.to(DEVICE)
    y_batch=y_batch.to(DEVICE)
    output=model(x_batch)
    optimizer.zero_grad()
    loss=criterion(output, y_batch)
    preds=torch.argmax(output, 1)
    loss.backward()
    optimizer.step()
    running_loss+=loss*x_batch.size(0)
    running_correct+=(preds==y_batch).sum().item()
    running_total+=x_batch.size(0)
  train_loss=running_loss/running_total
  train_acc=running_correct/running_total
  return train_loss, train_acc

###Loss 

In [None]:
def val_epoch(model, test_data, criterion):
  running_loss=0.0
  running_correct=0
  running_total=0
  with torch.no_grad():
    for x_batch, y_batch in test_data:
      y_batch=y_batch.type(torch.LongTensor)
      x_batch=x_batch.to(DEVICE)
      y_batch=y_batch.to(DEVICE)
      model.eval()
      output=model(x_batch)
      loss=criterion(output, y_batch)
      preds=torch.argmax(output, 1)
      running_loss+=loss*x_batch.size(0)
      running_correct+=(preds==y_batch).sum().item()
      running_total+=x_batch.size(0)
  test_loss=running_loss/running_total
  test_acc=running_correct/running_total
  return test_loss, test_acc

###Train 

In [None]:
def train(model,train_dataset, batch_size, epochs):
  max_acc=0
  history=[]
  with tqdm(desc="epochs", total=epochs) as tbar:
      for i in range(epochs):
        if i%10==0:
          train_data, test_data=train_test_split(train_dataset, test_size=0.1)
          test_loader=DataLoader(test_data, batch_size=batch_size, shuffle=False)
          train_loader=DataLoader(train_data, batch_size=batch_size, shuffle=True)
            #Shuffle data every 10 epoch
        criterion=nn.CrossEntropyLoss()
        optimizer=torch.optim.Adam(model.parameters(), lr=0.001)
        train_loss, train_acc=train_epoch(model, train_loader, optimizer, criterion)
        test_loss, test_acc=val_epoch(model, test_loader, criterion)
        print()
        if test_acc>max_acc:
          max_acc=test_acc
          torch.save(model.state_dict(), "MNISTModel")
          print(f'Model with accuracy: {test_acc} is saved')
        print("Loss:", test_loss.item())
        print("Accuracy:",test_acc)
        history.append((train_loss, train_acc,  test_loss, test_acc))
        tbar.update(1)
        print(f'Epoch {i+1} train_loss: {train_loss:.3}  val_loss: {test_loss:.3} train_acc: {train_acc:.5} val_acc: {test_acc:.5}')
  return history


In [None]:
model=DigitRecognize(10).to(DEVICE)

In [None]:
train_dataset=MNISTDataset(train_images, train_target, 'train')
test_dataset=MNISTDataset(test_images,None , 'test')

In [None]:
history=train(model,train_dataset, 128, 100) #You can try to change count of epochs and batch size

In [None]:
train_loss, train_acc, test_loss, test_acc=zip(*history)

In [None]:
plt.figure(figsize=(15,9))
plt.plot(train_loss, label="Train loss")
plt.plot(test_loss, label="Test loss")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

In [None]:
plt.figure(figsize=(15,9))
plt.plot(train_acc, label="Train accuracy")
plt.plot(test_acc, label="Test accuracy")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.show()

In [None]:
def predict(model, test_data,batch_size):
  logits=[]
  test_loader=DataLoader(test_data, batch_size=batch_size, shuffle=False)
  with torch.no_grad():
    model.eval()
    for x_batch in test_loader:
      x_batch=x_batch.to(DEVICE)
      outputs=model(x_batch).cpu()
      preds=torch.argmax(outputs, 1)
      for i in preds:
        logits.append(i.item())
  return logits

In [None]:
load_model=DigitRecognize(10).to(DEVICE)
load_model.load_state_dict(torch.load("MNISTModel"))

In [None]:
predict_res=predict(load_model,test_dataset,64)