In [None]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device

# Load and Process Data

In [None]:
dataframe_train_combined = pd.read_csv('../input/digit-recognizer/train.csv')
dataframe_test = pd.read_csv('../input/digit-recognizer/test.csv')

### Train_Validation_Split

In [None]:
dataframe_train, dataframe_validation  = train_test_split(dataframe_train_combined,test_size=0.2, shuffle=True)

### converting data to appropriate format and dimentions
x datas are reshaped to 28*28 because thats the image dimentions, and that is what the model expects

In [None]:
x_train_combined = (dataframe_train_combined.drop('label',axis=1).to_numpy().astype(np.float32)/255.0).reshape(-1,28,28)  #ALL X data for final train
y_train_combined = dataframe_train_combined['label'].to_numpy()                                                           #ALL Y data for final train

x_train = (dataframe_train.drop('label',axis=1).to_numpy().astype(np.float32)/255.0).reshape(-1,28,28)                    #split X data for prelemenary train
y_train = dataframe_train['label'].to_numpy()                                                                             #split y data for prelemenary train
    
                                                                                                                          #x validation to calculate accuracy
x_validation = torch.from_numpy((dataframe_validation.drop('label',axis=1).to_numpy().astype(np.float32)/255.0).reshape(-1,28,28)).to(device)
y_validation = dataframe_validation['label'].to_numpy()                                                                   #y validation to calculate accuracy

### Transforms
This is used for image augmentation. Basically we will randomly alter the image a bit every time.

In [None]:
my_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(degrees=20),
    transforms.RandomAffine(degrees=20),
    transforms.RandomPerspective(),
    transforms.ToTensor()
])

### Dataset Class

In [None]:
class DigitDataset(Dataset):
    def __init__(self,x,y):
        self.x=torch.tensor(x.tolist())
        self.y=torch.from_numpy(y)
        self.len=len(y)
    def __getitem__(self,index):
        sample = my_transform(self.x[index]).reshape(28,28) , self.y[index]
            
        return sample
    
    def __len__(self):
        return self.len

dataset_train = DigitDataset(x_train,y_train)                             #Split data
dataset_train_combined = DigitDataset(x_train_combined,y_train_combined)  # All data

# Check first element

In [None]:
first_x , first_y = dataset_train[0]
print(first_y.item())

In [None]:
plt.imshow(first_x)

# Testing Hyperparameters

In [None]:
batch_size = 1024
num_epochs = 90
learning_rate = 0.003


# DataLoader

In [None]:
train_loader = DataLoader(dataset=dataset_train,batch_size = batch_size, shuffle = True)
train_loader_combined = DataLoader(dataset=dataset_train_combined,batch_size = batch_size, shuffle = True)

In [None]:
def get_accuracy():
    with torch.no_grad():
        y_pred =  model(x_validation)
        y_pred = y_pred.argmax(dim=1).to('cpu').numpy()
        return (y_pred == y_validation).sum()/len(y_validation)

# Model Optimizer and Loss

### Model Description
ImageShape=28*28 <br>
The image is modeled as having 28 sequences with 28 features for each sequence <br>
There are 2 bidirectional rnn's. One assumes rows as sequences and columns as features, the other one assumes columns as sequences and rows as features <br>
Since the rnn's are bidirectional, there will be 2 times the output, so we use a linear layer to combine the outputs of both directions<br>
Then we treat the rnn outputs as 2 seperate images, and do conv->maxpool->conv->maxpool. <br>
Then we flatten both and concatenate them. <br>
Then we pass them through some fully connected layers to get out final output <br>

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        self.drop = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()
        
        self.rnnH = nn.GRU(28,28,batch_first=True,bidirectional=True,num_layers=2,dropout=.5)
        self.rnnV = nn.GRU(28,28,batch_first=True,bidirectional=True,num_layers=2,dropout=.5)
        
        self.fc1H = nn.Linear(56,28)
        self.fc1V = nn.Linear(56,28)
        
        
        self.cnn1H = nn.Conv2d(1,16,5)
        self.pool1H = nn.MaxPool2d(2,2)
        self.cnn2H = nn.Conv2d(16,32,3)
        self.pool2H = nn.MaxPool2d(2,2)
        
        self.cnn1V = nn.Conv2d(1,16,5)
        self.pool1V = nn.MaxPool2d(2,2)
        self.cnn2V = nn.Conv2d(16,32,3)
        self.pool2V = nn.MaxPool2d(2,2)
        
        self.combine = nn.Linear(32*5*5*2,512)
        self.fc2 = nn.Linear(512,512)
        self.fc3 = nn.Linear(512,10)
        
    def forward(self,x):
        
        xH = x
        xV = x.permute(0,2,1)
        
        xH,_ = self.rnnH(xH)
        xV, _= self.rnnV(xV)
        
        
        xH = self.relu(self.fc1H(xH))
        xV = self.relu(self.fc1V(xV))
        
        xH= xH.reshape(-1,1,28,28)
        xV= xV.reshape(-1,1,28,28)
        
        xH = self.pool1H(self.relu(self.cnn1H(xH)))
        xH = self.pool2H(self.relu(self.cnn2H(xH)))
        xV = self.pool1V(self.relu(self.cnn1V(xV)))
        xV = self.pool2V(self.relu(self.cnn2V(xV)))
        
        #xH = self.pool2H(self.relu(self.cnn4H(self.relu(self.cnn3H(self.pool1H(self.relu(self.cnn2H(self.relu(self.cnn1H(xH))))))))))
        #xV = self.pool2H(self.relu(self.cnn4V(self.relu(self.cnn3V(self.pool1V(self.relu(self.cnn2V(self.relu(self.cnn1V(xV))))))))))
        
        x = self.drop(torch.cat((xH.reshape(-1,32*5*5), xV.reshape(-1,32*5*5)), dim=1))
        x = self.drop(self.relu(self.combine(x)))

        x = self.fc3(self.relu(self.fc2(x)))
        return x
model = Model().to(device)

In [None]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size =10, gamma=0.6)

# Training Loop (Split data)

In [None]:
all_loss =[]
all_accuracy=[]


for epoch in range(num_epochs):
    running_loss=0.0
    loops=0
    for  x, y in train_loader:

        x , y = x.to(device) , y.to(device)

        pred = model(x)
        loss= criterion(pred,y)
        
        running_loss+=loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loops+=1
    
    model.eval()
    all_loss.append(running_loss/loops)
    all_accuracy.append(get_accuracy())
    model.train()
    scheduler.step()
    
    print('Epoch:',epoch+1,'Train_Loss:',all_loss[-1],'VAL_Accuracy',all_accuracy[-1])
    
        


In [None]:
plt.figure(figsize=(12,6))
plt.plot(all_loss)
plt.plot(all_accuracy)

# Now time to train using all data

In [None]:
model = Model().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size =10, gamma=0.6)

In [None]:
all_loss =[]

for epoch in range(num_epochs):
    running_loss=0.0
    loops=0
    for  x, y in train_loader_combined:

        x , y = x.to(device) , y.to(device)

        pred = model(x)
        loss= criterion(pred,y)
        
        running_loss+=loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loops+=1

    all_loss.append(running_loss/loops)
    scheduler.step()
    
    print('Epoch:',epoch+1,'Train_Loss:',all_loss[-1])

# Predict Test Data and Save

In [None]:
model.eval()
test_data = torch.from_numpy((dataframe_test.to_numpy().astype(np.float32())/255.0).reshape(-1,28,28)).to(device)
predicted = model(test_data)
predicted = predicted.argmax(dim=1).to('cpu').numpy().reshape(-1,1)

seq = (np.arange(28000)+1).reshape(-1,1)
out =  np.append(seq,predicted,axis=1)
out = pd.DataFrame(out,columns=['ImageId','Label'])
out.to_csv('out.csv',index=False)
out.head()

In [None]:
out.to_csv('out.csv',index=False)