# MNIST DATASET

The MNIST database (Modified National Institute of Standards and Technology database) is a large database of handwritten digits that is commonly used for training various image processing systems.The database is also widely used for training and testing in the field of machine learning.It was created by "re-mixing" the samples from NIST's original datasets.

# 1. Importing necessary libraries. 

In [None]:
import torch
from tqdm import tqdm
import numpy as np
from torchvision import transforms,models
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader, Dataset,ConcatDataset
from torch import nn

# 2. Using pandas library to read the datasets.

In [None]:
train=pd.read_csv("../input/digit-recognizer/train.csv")
test=pd.read_csv("../input/digit-recognizer/test.csv")


# 3. Creating a custom class to load data.

In [None]:
class MnistDataset(Dataset):
    
    def __init__(self, dataframe, 
                 transform = transforms.Compose([transforms.ToTensor()])):
        
        df = dataframe
        self.n_pixels = 784
        
        if len(df.columns) == self.n_pixels:
            # validation data
            self.X = df.values.reshape((-1,28,28)).astype(np.uint8)[:,:,:,None]
            self.y = None
        else:
            # training data
            self.X = df.iloc[:,1:].values.reshape((-1,28,28)).astype(np.uint8)[:,:,:,None]
            self.y = torch.from_numpy(df.iloc[:,0].values)
            
        self.transform = transform
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.y is not None:
            return self.transform(self.X[idx]), self.y[idx]
        else:
            return self.transform(self.X[idx])

# 4. Defining the transforms.

In [None]:
 img_tform_1 = transforms.Compose([
    transforms.ToPILImage(),transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_2 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomRotation(10),transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_3 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomRotation(20),transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_4 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomAffine(degrees=15, translate=(0.1,0.1), scale=(0.85,0.85)),\
    transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_5 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomAffine(0,shear=30,scale=[1.15,1.15]),\
    transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_6 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomAffine(0,shear=20,scale=[0.8,0.8]),\
    transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

img_tform_7 = transforms.Compose([
    transforms.ToPILImage(),transforms.RandomAffine(degrees=30, scale=(1.2,1.2)),\
    transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])



# 5. Using the defined class and DataLoader to load data into pytorch datasets.

We will be using the batch-size as 64 and num_workers as 4. We use the customized "MNISTDataset" class to convert the data from csv files into a format which is loaded into the dataloader. This is done for both train and validation datasets.

In [None]:
from sklearn.model_selection import train_test_split
seed=42
def create_dataloaders(seed, test_size=0.1, df=train, batch_size=64):
    # Create training set and validation set
    train_df, val_df = train_test_split(df,test_size=test_size,random_state=seed)
    
    # Create Datasets
    train_data_1 = MnistDataset(train_df)
    train_data_2 = MnistDataset(train_df, img_tform_2)
    train_data_3 = MnistDataset(train_df, img_tform_3)
    train_data_4 = MnistDataset(train_df, img_tform_4)
    train_data_5 = MnistDataset(train_df, img_tform_5)
    train_data_6 = MnistDataset(train_df, img_tform_6)
    train_data_7 = MnistDataset(train_df, img_tform_7)
    train_final = ConcatDataset([train_data_1, train_data_2, train_data_3, train_data_4, train_data_5,\
                                   train_data_6,train_data_7])

    val_data = MnistDataset(val_df)
    
    # Create Dataloaders
    train_loader = torch.utils.data.DataLoader(train_final, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)

    return train_loader, valid_loader



In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

We will be defining the classes on which we will be predicting. We are having 10 classes consisting of numbers 0-9.

In [None]:
classes = [i for i in range(0,10)]

# 6. Defining the pretrained model.

ResNet, which was proposed in 2015 by researchers at Microsoft Research introduced a new architecture called Residual Network.

Residual Block:
In order to solve the problem of the vanishing/exploding gradient, this architecture introduced the concept called Residual Network. In this network we use a technique called skip connections . The skip connection skips training from a few layers and connects directly to the output.

This network uses a 34-layer plain network architecture inspired by VGG-19 in which then the shortcut connection is added. These shortcut connections then convert the architecture into residual network. 

![](https://media.geeksforgeeks.org/wp-content/uploads/20200424011138/ResNet.PNG)

We tune the number of channels in the first layer of the ResNet to suit the images present in the dataset.
We also add a fully connected layer at the end of the network architecture to make the prediction.

In [None]:
model = models.resnet34(pretrained=True)
model.conv1 = torch.nn.Conv1d(1, 64, (3, 3), (1, 1), (1, 1), bias=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

In [None]:
model

# 7. Defining the loss function and optimizer.

We are using a "CrossEntropy" loss function and an "Adam" optimizer while training the model.
We also use a scheduler which decays the learning rate of each parameter group by gamma every step_size epochs. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler.

In [None]:
if torch.cuda.is_available():
    model.cuda()
    
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),amsgrad=True)
xp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1, verbose=True)

# 8. Training the defined network.

We will be training our network on 10 epochs.

In [None]:
epochs= 20
valid_loss_min = np.Inf
train_epoch=[]
train_loss_vals=[]
train_acc_vals=[]
valid_epoch=[]
valid_loss_vals=[]
valid_acc_vals=[]
test_loss_val=[]
test_epoch=[]
train_loader, valid_loader= create_dataloaders(seed=seed)
for i in range(epochs):
    model.train()
    train_acc=0
    valid_acc=0
    total=0
    with tqdm(train_loader, unit="batch") as tepoch:
        for data, target in tepoch:
            if torch.cuda.is_available():
                data,target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output=model(data)
            _, predicted = torch.max(output.data, 1)
            train_acc+=((predicted==target).sum().item())
            total += target.size(0)
            loss = criterion(output, target)
            loss.backward()
            train_epoch.append(loss.item())
            optimizer.step()
        
    xp_lr_scheduler.step()
    train_loss_vals.append(sum(train_epoch)/len(train_epoch))
    train_acc_vals.append(100 * train_acc/ total)
    model.eval()
    total=0
    with tqdm(valid_loader, unit="batch") as tepoch:
        for data, target in tepoch:
            if torch.cuda.is_available():
                data,target= data.cuda(),target.cuda()
            output=model(data)
            _, predicted = torch.max(output.data, 1)
            valid_acc+=((predicted==target).sum().item())
            total += target.size(0)
            loss= criterion(output,target)
            valid_epoch.append(loss.item())
    valid_loss_vals.append(sum(valid_epoch)/len(valid_epoch))
    valid_acc_vals.append(100 * valid_acc/ total)
    
    print("epoch:{}\t  training_loss:{}\t  validation_loss:{}\t  train_accuracy:{}\t  validation_accuracy:{}"
          .format(i,train_loss_vals[i],valid_loss_vals[i],train_acc_vals[i],valid_acc_vals[i]))
    if valid_loss_vals[i] <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss_vals[i]))
        torch.save(model.state_dict(), 'model_cifar.pt')
        valid_loss_min = valid_loss_vals[i]

# 9. Saving the model.

In [None]:
model.load_state_dict(torch.load('model_cifar.pt'))

# 10. Plotting the train and validation accuracy curves.

In [None]:
plt.plot(np.linspace(1, epochs, epochs).astype(int), train_acc_vals,label='train_accuracy')
plt.plot(np.linspace(1, epochs, epochs).astype(int), valid_acc_vals,label='valid_accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.title('Accuracy curve')

# 11. Plotting the train and validation loss curves.

In [None]:
plt.plot(np.linspace(1, epochs, epochs).astype(int), train_loss_vals,label='train_loss')
plt.plot(np.linspace(1, epochs, epochs).astype(int), valid_loss_vals,label='valid_loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title('loss functions')

# 12. Loading the test data and preprocessing the images.

In [None]:

test_images = test.values.reshape((-1, 1, 28, 28)) / 255.0
print(test_images.shape)

test_image_tensor = torch.tensor(test_images, dtype=torch.float32)

# 13. Making the predictions.

In [None]:
model.eval()
result = np.zeros(test_images.shape[0], dtype=np.int64)

with torch.no_grad():
    for i in range(test_images.shape[0]):
        image = test_image_tensor[i, 0, :, :].view(1, 1, 28, 28)
        output=model(image.cuda())
        _, pred = torch.max(output, 1) 
        result[i] = classes[pred.item()]

In [None]:
result[:10]

In [None]:
sample_submission=pd.read_csv('../input/digit-recognizer/sample_submission.csv')
sample_submission['Label']=result

In [None]:
sample_submission

# 14. Converting predictions to CSV file and submitting.

In [None]:
sample_submission.to_csv('submission1.csv', index=False)

### With this, we come to the end of the notebook.
**Please upvote if you found it useful :)
It motivates me a lot to share more such stuff.**