In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Here I have read my repective training and test dataset which are present in my local kaggle environment in csv file format
train=pd.read_csv("../input/digit-recognizer/train.csv")
test=pd.read_csv("../input/digit-recognizer/test.csv")

In [None]:
#visuliazing the train dataset
print(train.head())
train.describe()

In [None]:
#visuliazing the test dataset
test.head()

In [None]:
#checking for missing data in training dataset
train.isnull().sum().sum()

In [None]:
#checking for missing data in testing dataset
test.isnull().sum().sum()

In [None]:
# separate label and pixels features for the training set
# the testing set does not contain target variable
label=train["label"]
pixels_train=train.drop(labels=["label"],axis=1)



In [None]:
#performing normalistaion on pixels values of training and testing data to make pixel value on a defined particular range.
norm_pixels_train=pixels_train/255
norm_test=test/255

In [None]:
#splitting the training data into training and cross-validation dataset

from sklearn.model_selection import train_test_split
feature_train,feature_valid,target_train,target_valid=train_test_split(norm_pixels_train,label,test_size=0.05,random_state=0)

In [None]:
#change dataframe to numpy,and then to tensor form
import numpy as np
import torch

test=torch.from_numpy(norm_test.values.reshape((-1,1,28,28)))
features_Train=torch.from_numpy(feature_train.values.reshape((-1,1,28,28)))
features_Valid=torch.from_numpy(feature_valid.values.reshape((-1,1,28,28)))
targets_Train=torch.from_numpy(target_train.values)
targets_Valid=torch.from_numpy(target_valid.values)
len(targets_Train)


In [None]:
# create dataset like this, move this cell to the end of the section before data loading
test_set=torch.utils.data.TensorDataset(test.float())
train_set=torch.utils.data.TensorDataset(features_Train.float(),targets_Train)
valid_set=torch.utils.data.TensorDataset(features_Valid.float(),targets_Valid)


In [None]:
#loading data using the dataloader
train_loader=torch.utils.data.DataLoader(train_set,batch_size=25,shuffle=True)
valid_loader=torch.utils.data.DataLoader(valid_set,batch_size=25,shuffle=False)
test_loader=torch.utils.data.DataLoader(test_set,batch_size=25,shuffle=False)

In [None]:
#created a function to the number of prediction got  from a dataset after putting into model.
def get_num_correct(pred,label):
  return pred.argmax(dim=1).eq(label).sum().item()


In [None]:
#here i have defined my own neural network with the help of pytorch framework to get desirable model
import torch.nn as nn
import torch.nn.functional as F
class Network(nn.Module):
  def __init__(self):
    super(Network,self).__init__()
    self.conv1=nn.Conv2d(in_channels=1,out_channels=32,kernel_size=5)
    self.conv2=nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5)
    self.conv3=nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3)
    self.conv4=nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3)

    self.fc1=nn.Linear(in_features=64*3*3,out_features=256)
    self.fc2=nn.Linear(in_features=256,out_features=128)     

    self.out=nn.Linear(in_features=128,out_features=10)
  def forward(self,t):
    t=t
    t=self.conv1(t)
    t=F.relu(t)
    
    t=self.conv2(t)
    t=F.relu(t)
    
    t=F.max_pool2d(t,stride=2,kernel_size=2)
    
    t=self.conv3(t)
    t=F.relu(t)
    
    t=self.conv4(t)
    t=F.relu(t)

    t=F.max_pool2d(t,stride=2,kernel_size=2)
    
    t=t.reshape(-1,64*3*3)
    
    t=self.fc1(t)
    t=F.relu(t)
    
    t=self.fc2(t)
    t=F.relu(t)
    
    t=self.out(t)
    return t

In [None]:
#here i am performing training my model with the help of provided trainining dataset
import torch.optim as optim
#creating the instance of our defined model
network=Network().to("cuda")

optimizer=optim.RMSprop(network.parameters(),lr=0.001,alpha=0.9)
lr_reduction = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0.00001)


for i in range(5):
   total_loss=0
   total_correct=0
   itr=0
   i+=1
   print("----------------------------------------------------------------------")
   print("Epoch:",i)
   print("----------------------------------------------------------------------")
   for batch in train_loader:
        images=batch[0].to("cuda")
        labels=batch[1].to("cuda")
        itr+=1
        preds=network(images)
        loss=F.cross_entropy(preds,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_correct+=get_num_correct(preds,labels)
        total_loss+=loss.item()
        print("Epoch: ",itr,total_correct,"loss: ",total_loss)
print("-----------------------------------------")  
print("Accuracy: ",(total_correct/39900))  
print("-----------------------------------------")


In [None]:
#here i have defined a function to predict the output of my validation dataloader
def get_all_pred(model,loader):
  all_preds=torch.tensor([]).to('cuda')
  for batch in loader:
    image=batch[0].to("cuda")
    label=batch[1].to("cuda")
    pred=model(image)
    all_preds=torch.cat((all_preds,pred),dim=0)
  return all_preds

In [None]:
#get prediction of validation dataloader
valid_pred=get_all_pred(network,valid_loader).to("cpu")
print(valid_pred.shape)
valid_pred

In [None]:
#getting the count of dataset which are corerectly predicted by model
#here out of 2100 dataset ,my model have made 2079 prediction correctly
valid_ac=get_num_correct(valid_pred,targets_Valid)
valid_ac

In [None]:
#here i have defined a function to predict the output of my testset dataloader

def _get_all_pred(model,loader):
    all_preds=torch.tensor([]).to('cuda')
    for batch in loader:
        image=batch[0].to("cuda")
        pred=model(image)
        all_preds=torch.cat((all_preds,pred),dim=0)
    return all_preds

In [None]:
#get prediction of validation dataloader

test_pred=_get_all_pred(network,test_loader)
print(test_pred.shape)
test_pred



In [None]:
#Converting the prediction of my dataset into respective index and then moving my output tensor back to cpu from gpu

test_result=test_pred.argmax(dim=1).to("cpu")
print(test_result.shape)
test_result

In [None]:
#converting the labels of my prediction into the list
Label=test_result.tolist()
Label

In [None]:
#Genearting unique id for each image in my testset and then converting it into list format.
ImageId=list(range(1,28001))
ImageId

In [None]:
#catenating the twolist together in a singledataframe with the help of dictionary
df=pd.DataFrame({"ImageId":ImageId,"Label":Label})

In [None]:
#here is the output of my prediction in the from of dataframe
df

In [None]:
#here i have moved my submission data from dataframe into csv file format with the help pandas method.
df.to_csv('submission.csv', index=False)
