In [None]:
import numpy as np 
import os
import pandas as pd
import matplotlib.pyplot as plt
import torch 
import torchvision
from torchvision import datasets,transforms
import torchvision.utils as vutils
from torch.utils.data import DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import random
seed=99


In [None]:
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
device=torch.device("cuda")
learning_rate=0.00025
num_epochs=500

In [None]:
df=pd.read_csv("../input/digit-recognizer/train.csv")
df_test=pd.read_csv("../input/digit-recognizer/test.csv")

In [None]:
df.describe()

In [None]:
train_original=datasets.MNIST(root='./',train=True,download=True,transform=transforms.ToTensor())
train_df,vald_df=train_test_split(df,train_size=0.9,random_state=seed)
train_df_x=np.array(train_df.drop(columns='label'))
vald_df_x=np.array(vald_df.drop(columns='label'))
train_df_x=train_df_x.reshape(-1,1,28,28)
vald_df_x=vald_df_x.reshape(-1,1,28,28)
train_df_y=train_df.label
vald_df_y=vald_df.label
test_df_x=np.array(df_test).reshape(-1,1,28,28)
print("Train:{} , Test:{} and Validation:{}".format(train_df_x.shape,test_df_x.shape,vald_df_x.shape))

In [None]:
class DigitDataset(Dataset):
    def __init__(self,X,Y):
        self.image = X/255 
        self.label = Y.values.reshape((-1, 1))

    def __len__(self):
        return len(self.image)
    
    def __getitem__(self,idx):
        image = self.image[idx]
        label = self.label[idx]
        return torch.FloatTensor(image), torch.FloatTensor(label)      
    
class testDataset(Dataset):
    def __init__(self,X):
        self.image=X/255
    def __len__(self):
        return len(self.image)
    def __getitem__(self,idx):
        image=self.image[idx]
        return torch.FloatTensor(image)
    

In [None]:

vald_d=DigitDataset(vald_df_x,vald_df_y)
test_d=testDataset(test_df_x)


train_loader = DataLoader(train_original, batch_size = 100, shuffle = True) 
vald_loader=DataLoader(vald_d,batch_size=500,shuffle=True)
test_loader=DataLoader(test_d,batch_size=1)


In [None]:
real_batch = next(iter(train_loader))
plt.figure(figsize=(40,20))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:128], padding=2, normalize=True).cpu(),(1,2,0)))

In [None]:
class conv(nn.Module):
    def __init__(self):
        super(conv, self).__init__()
        self.convlayer=nn.Sequential(
            #input 1*28*28
            nn.Conv2d(1,32,3,1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32,32,3,1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32,32,5,2,3),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout2d(0.1),
            
            nn.Conv2d(32,64,3,1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64,64,3,1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64,64,5,2,3),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout2d(0.1),
        )
        self.linearlayers=nn.Sequential(
            #input 64*6*6
            nn.Flatten(),
            nn.Linear(2304,128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.05),
            nn.Linear(128,10),
            
            #nn.Softmax(),
        )
        self.softmax=nn.Softmax(dim=1)
    def forward(self,x):
        output=self.convlayer(x)
        output=self.linearlayers(output)
        return self.softmax(output)
        

In [None]:
model = conv().to(device)

optimizer = optim.Adam(model.parameters(),0.0005)
total_step = len(train_loader)
model.train()
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):

        images = images.to(device)
        labels = labels.to(device)
       
        optimizer.zero_grad()
       
        output  = model.forward(images)
        
        labels=labels.long().squeeze()
        
        loss = F.cross_entropy(output,labels)
        loss.backward()
        optimizer.step()
        if (i+1) %300 == 0:
            loss_val=[]
            predict=[]
            predict2=[]
            for j in output:
                predict.append(j.argmax().to('cpu'))
            for j in output:
                predict2.append(j.argmax().to('cpu'))    
            p_loss=(sum(np.array(labels.to('cpu'))==np.array(predict))/len(predict))
            for (images,labels) in vald_loader:
                images=images.to(device)
                labels=labels.to(device)
                output=model(images)
                labels=(labels.long()).squeeze()
                predict=[]
                for j in output:
                    predict.append(j.argmax().to('cpu'))
                loss_val.append((sum(np.array(labels.to('cpu'))==np.array(predict)))/len(predict))    
                
            print ('Epoch [{}/{}], Step [{}/{}], AC: {:.4f} , Valid_AC:{}'.format(epoch+1, num_epochs, i+1, total_step, p_loss*100,100*sum(loss_val)/len(loss_val)))
            



In [None]:
result=pd.DataFrame(columns=['ImageId','Label'])
serial=[]
ans=[]
model.eval()
for s,i in enumerate(test_loader):
    serial.append(s+1)
    ans.append(((model(i.to(device))).argmax()).to('cpu'))
result['ImageId']=serial    
result['Label']=np.array(ans)

In [None]:
result.to_csv("result.csv",index=False)

In [None]:
result