**Importing the PyTorch Library**

In [1]:
import numpy as np 
import pandas as pd

In [2]:
import torch
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
from tqdm import tqdm

**Read the required Dataset**

In [3]:
trainData = pd.read_csv('trainLabels.csv')


**Analyzing the data with PyTorch**

In [4]:
print("Number of points:",trainData.shape[0])
print("Number of features:",trainData.shape[1])
print("Features:",trainData.columns.values)
print("Number of Unique Values")
for col in trainData:
    print(col,":",len(trainData[col].unique()))
plt.figure(figsize=(12,8))

Number of points: 50000
Number of features: 2
Features: ['id' 'label']
Number of Unique Values
id : 50000
label : 10


<Figure size 864x576 with 0 Axes>

<Figure size 864x576 with 0 Axes>

**Getting the validation set using PyTorch**

In [5]:
from torch.utils.data import random_split
val_size = 5000
train_size = len(trainData) - val_size

train_ds, val_ds = random_split(trainData, [train_size, val_size])
len(train_ds), len(val_ds)

(45000, 5000)

In [6]:
from torch.utils.data.dataloader import DataLoader

batch_size=64
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size, num_workers=4, pin_memory=True)

**Defining the required functions**

In [7]:
@torch.no_grad()
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        accu = accuracy(out,labels)
        return loss,accu
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'Loss': loss.detach(), 'Accuracy': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['Loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['Accuracy'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'Loss': epoch_loss.item(), 'Accuracy': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch :",epoch + 1)
        print(f'Train Accuracy:{result["train_accuracy"]*100:.2f}% Validation Accuracy:{result["Accuracy"]*100:.2f}%')
        print(f'Train Loss:{result["train_loss"]:.4f} Validation Loss:{result["Loss"]:.4f}')

In [8]:
 
global file_name1
file_name1='model.pth'
model_folder_path = './model'
if not os.path.exists(model_folder_path):
    os.makedirs(model_folder_path)
file_name = os.path.join(model_folder_path,file_name1)
#         torch.save(self.state_dict(), file_name)

**Implementation of convolutional neural network module**

In [9]:
class Cifar10CnnModel(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
            nn.BatchNorm2d(128),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4
            nn.BatchNorm2d(256),

            nn.Flatten(), 
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
        
    def forward(self, xb):
        return self.network(xb)
#     Train the model
#     @torch.no_grad()
    def evaluate(model, data_loader):
        model.eval()
        outputs = [model.validation_step(batch) for batch in data_loader]
        return model.validation_epoch_end(outputs)

    def fit(model, train_loader, val_loader,epochs=10,learning_rate=0.001):
        best_valid = None
        history = []

        optimizer = torch.optim.Adam(model.parameters(), learning_rate,weight_decay=0.0005)
        for epoch in range(epochs):
            # Training Phase 
            model.train()
            train_losses = []
            train_accuracy = []
            for batch in tqdm(train_loader):
                loss,accu = model.training_step(batch)
                train_losses.append(loss)
                train_accuracy.append(accu)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            # Validation phase
            result = evaluate(model, val_loader)
            result['train_loss'] = torch.stack(train_losses).mean().item()
            result['train_accuracy'] = torch.stack(train_accuracy).mean().item()
            model.epoch_end(epoch, result)
            print("Model saved")

            if(best_valid == None or best_valid<result['Accuracy']):
                best_valid=result['Accuracy']
                torch.save(model.state_dict(),file_name1)
                print("Model saved")
    #             torch.save(model.state_dict(), 'm')
            history.append(result)
        return history

In [10]:
# from google.colab import drive
# drive.mount('/content/gdrive')

In [11]:
# global file_name  
# file_name='model.pth'
# model_folder_path = './model'
# if not os.path.exists(model_folder_path):
#     os.makedirs(model_folder_path)
# file_name = os.path.join(model_folder_path, file_name)
# #         torch.save(self.state_dict(), file_name)

**Train the model**

In [12]:
# @torch.no_grad()
# def evaluate(model, data_loader):
#     model.eval()
#     outputs = [model.validation_step(batch) for batch in data_loader]
#     return model.validation_epoch_end(outputs)

# def fit(model, train_loader, val_loader,epochs=10,learning_rate=0.001):
#     best_valid = None
#     history = []
  
#     optimizer = torch.optim.Adam(model.parameters(), learning_rate,weight_decay=0.0005)
#     for epoch in range(epochs):
#         # Training Phase 
#         model.train()
#         train_losses = []
#         train_accuracy = []
#         for batch in tqdm(train_loader):
#             loss,accu = model.training_step(batch)
#             train_losses.append(loss)
#             train_accuracy.append(accu)
#             loss.backward()
#             optimizer.step()
#             optimizer.zero_grad()
#         # Validation phase
#         result = evaluate(model, val_loader)
#         result['train_loss'] = torch.stack(train_losses).mean().item()
#         result['train_accuracy'] = torch.stack(train_accuracy).mean().item()
#         model.epoch_end(epoch, result)
#         print("Model saved")
#         global file_name
    

#         if(best_valid == None or best_valid<result['Accuracy']):
#             best_valid=result['Accuracy']
#             torch.save(model.state_dict(),file_name)
#             print("Model saved")
# #             torch.save(model.state_dict(), 'm')
#         history.append(result)
#     return history,filename

In [13]:
# model = torch.load('model_cnn.pth')
# model_save_name = 'model_cnn.pth'
model= torch.load(file_name1)
history = fit(model, train_dl, val_dl)

FileNotFoundError: [Errno 2] No such file or directory: 'model.pth'

**Plotting the results**

In [None]:
def plot_accuracies(history):
    Validation_accuracies = [x['Accuracy'] for x in history]
    Training_Accuracies = [x['train_accuracy'] for x in history]
    plt.plot(Training_Accuracies, '-rx')
    plt.plot(Validation_accuracies, '-bx')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend(['Training', 'Validation'])
    plt.title('Accuracy vs. No. of epochs');
plot_accuracies(history)

In [None]:
def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['Loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs');
plot_losses(history)

**Getting the accuracy**

In [None]:
test_dataset = ImageFolder(data_dir+'/test', transform=ToTensor())
test_loader = DeviceDataLoader(DataLoader(test_dataset, batch_size), device)
result = evaluate(final_model, test_loader)
print(f'Test Accuracy:{result["Accuracy"]*100:.2f}%')