In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision import models,transforms
from torchvision.utils import make_grid
from torchvision.datasets import MNIST
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

torch.set_printoptions(linewidth=150)

In [None]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
# MNIST
def mnist(batch_sz, valid_size=0.2, shuffle=True, random_seed=2000):
    num_classes = 10
    transform_train = transforms.Compose([
                        transforms.RandomCrop(28, padding=4),
                        transforms.ToTensor(),
                    ])
    
    transform_valid = transforms.Compose([
                        transforms.ToTensor(),
                    ])
    
    transform_test = transforms.Compose([
                        transforms.ToTensor(),
                    ])
    

    # Training dataset
    train_data = MNIST(root='./datasets', train=True, download=True, transform=transform_train)
    valid_data = MNIST(root='./datasets', train=True, download=True, transform=transform_valid)
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    if shuffle == True:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_idx, valid_idx = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_sz, sampler=train_sampler,pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_sz, sampler=valid_sampler,pin_memory=True)

    # Test dataset
    test_data = MNIST(root='./datasets', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_sz, shuffle=False, pin_memory=True)

    return train_loader, valid_loader, test_loader

In [None]:
batch_sz=64 # this is batch size i.e. the number of rows in a batch of data
train_loader, valid_loader, test_loader=mnist(batch_sz) 

In [None]:
class NNet(nn.Module):
    def __init__(self, input_size, output_size=10, no_hidden_layers=10,hidden_layer_size=512) -> None:
        super(NNet,self).__init__()
        self.deep_nn = nn.Sequential()
        
        for i in range(no_hidden_layers):
            self.deep_nn.add_module(f'ff{i}', nn.Linear(input_size,hidden_layer_size))
            self.deep_nn.add_module(f'activation{i}',nn.ReLU())
            input_size = hidden_layer_size

        self.deep_nn.add_module(f'classifier',nn.Linear(hidden_layer_size,output_size))

    def forward(self, x):
        tensor = self.deep_nn(x)
        return tensor

In [None]:
# x = torch.randn(64,1,28,28)
# x = x.reshape(x.shape[0],-1)

# print(x.shape)

In [None]:
model = NNet(784).to(device=device)

In [None]:
# test = model(x)

In [None]:
# Hyperparameter

# input_size = 784
# lr = 1e-3
# mm = 0.5
# epoch = 100



In [None]:
class ModelTraining:
    def __init__(self,input_size,epoch=100,learning_rate = 1e-3, momentum = 0.5) -> None:
        self._in_size = input_size
        self._lr = learning_rate
        self._mm = momentum 
        self._epoch = epoch

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD(model.parameters(), lr=self._lr, momentum= self._mm)
        self.lr_scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer,gamma=0.9,verbose=True)


        # outputs

        self.loss = list()
        self.accuracy = list()



    def training(self):
        for i in range(self._epoch):
            total_loss = 0
            total_accuracy = 0
            total_train = 0
            for batch_idx, (data, targets) in enumerate(train_loader):
                data = data.to(device=device)
                targets = targets.to(device=device)

                data = data.reshape(data.shape[0],-1)

                # forwards
                logits = model(data)
                loss = self.criterion(logits, targets)

                total_loss += loss.item()

                # gradient decent
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                out = torch.argmax(logits, dim=1)
                total_accuracy += torch.sum(out == targets)
                total_train += logits.shape[0]

            self.loss.append(total_loss)

        return 'Model Training completed!'

    def output(self):
        return self.loss


In [None]:
output = ModelTraining(784)

In [None]:
test = output.training()