In [73]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split

In [163]:
def _get_data(DATA_PATH, BATCH_SIZE):
    try:
        mnist_training_dataset = datasets.MNIST(root=DATAPATH+'train', train=True, download=True, transform=ToTensor())
        mnist_testing_dataset = datasets.MNIST(root=DATAPATH+'test', train=False, download=True, transform=ToTensor())
        
        training_dataset, validation_dataset = random_split(mnist_training_dataset, [int(0.8*len(mnist_training_dataset)), int(00.2*len(mnist_training_dataset))])
        
        train_loader = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validation_loader = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=False)
        test_loader = DataLoader(mnist_testing_dataset, batch_size=BATCH_SIZE, shuffle=False)
        
        return train_loader, validation_loader, test_loader
    
    except Exception as e:
        print('Unable to get data due to ', e)
        
def network_training(model, train_loader, validation_loader, criterion, optimizer, training_params):
    try:
        best_accuracy = 0
        loss_history = []
        training_acc_history = []
        validation_acc_history = []
        
        for epoch in range(0, training_params['NUM_EPOCHS']):
            model.train()
            loss_scores = []
            training_acc_scores = []
            correct_predictions = 0
            
            for batch_index, (images, targets) in enumerate(train_loader):
                images = images.to(training_params['DEVICE'])
                targets = targets.to(training_params['DEVICE'])
                
                outputs = model(images)
                loss = criterion(outputs, targets)
                loss_scores.append(loss.item())
                
                _, preds = torch.max(outputs, 1)
                correct_predictions = (preds==targets).sum().item()
                training_acc_scores.append(correct_predictions/targets.shape[0])
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                if (batch_index+1) % 100 == 0:
                    print(f"Epoch : [{epoch+1}/{training_params['NUM_EPOCHS']}] | Step : [{batch_index+1}/{len(train_loader)}] | Loss : {loss.item()} ")
            
            loss_history.append((sum(loss_scores)/len(loss_scores)))
            training_acc_history.append((sum(training_acc_scores)/len(training_acc_scores))*100)      
            print(f'Epoch : {epoch+1} | Loss : {loss_history[-1]} | Training Accuracy : {training_acc_history[-1]}%')
       
            model.eval()
            with torch.no_grad():
                correct_predictions = 0
                validation_acc_scores = []

                for images, targets in iter(validation_loader):
                    images = images.to(training_params['DEVICE'])
                    targets = targets.to(training_params['DEVICE'])
                                
                    outputs = model(images)

                    _, preds = torch.max(outputs, 1)
                    correct_predictions = (preds == targets).sum().item()
                    validation_acc_scores.append(correct_predictions/targets.shape[0])

                validation_acc_history.append((sum(validation_acc_scores)/len(validation_acc_scores))*100)
                print(f'Epoch {epoch+1} | Validation Accuracy {validation_acc_history[-1]}%')

                if validation_acc_history[-1]>best_accuracy:
                    best_accuracy = validation_acc_history[-1]
                    print('Saving the model...')
                    torch.save(model.state_dict(), f"Accuracy_{best_accuracy}_batchsize_{training_params['BATCH_SIZE']}_lr_{training_params['LEARNING_RATE']}.ckpt")
                
        return loss_history
    except Exception as e:
        print('Error occured in training method = ', e)

In [164]:
class CNN_Network(nn.Module):
    def __init__(self, model_params):
        try:
            super(CNN_Network, self).__init__()
            
            layers = []
            
            for input_channel, out_channel in zip([model_params['INPUT_SIZE']] + model_params['HIDDEN_LAYERS'][:-1], 
                                                     model_params['HIDDEN_LAYERS'][:len(model_params['HIDDEN_LAYERS'])]):
                layers.append(nn.Conv2d(input_channel, out_channel, model_params['KERNEL'], model_params['STRIDE'], model_params['PADDING'], bias=True))
                layers.append(nn.MaxPool2d(2, 2))
                layers.append(nn.ReLU())
            layers.append(nn.Flatten(1))      
            layers.append(nn.Linear(model_params['HIDDEN_LAYERS'][-1], model_params['OUTPUT_SIZE'], bias=True))

            self.layers = nn.Sequential(*layers)
        
        except Exception as e:
            print('initializing failed due to ', e)
    
    def forward(self, x):
        try:
            return self.layers(x)
        
        except Exception as e:
            print('forward pass failed due to ', e)

In [165]:
if __name__ == '__main__' :
    
    print(torch.__version__)
    
    DATA_PATH = 'D:\Repos\MLCS_Project_Assignments\\'
    
    training_params = {
        'DEVICE' : torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        'BATCH_SIZE' : 64,
        'LEARNING_RATE' : 0.001,
        'NUM_EPOCHS' : 3
    }
    
    model_params = {
        'INPUT_SIZE' : 1,
        'HIDDEN_LAYERS' : [160, 100, 64, 10],
        'OUTPUT_SIZE' : 10,
        'KERNEL' : 3,
        'STRIDE' : 1,
        'PADDING' : 1
    }
    
    train_loader, validation_loader, test_loader = _get_data(DATA_PATH, training_params['BATCH_SIZE'])
    
    network = CNN_Network(model_params).to(training_params['DEVICE'])
    print(f'Network structure is: {network.parameters}')
    print(f'Total number of parameters: {sum(p.numel() for p in network.parameters())}')
    
    criterion = nn.CrossEntropyLoss().to(training_params['DEVICE'])
    optimizer = optim.Adam(network.parameters(), lr=training_params['LEARNING_RATE'])

    loss = network_training(network, train_loader, validation_loader, criterion, optimizer, training_params)
        

1.10.0
Network structure is: <bound method Module.parameters of CNN_Network(
  (layers): Sequential(
    (0): Conv2d(1, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Conv2d(160, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): ReLU()
    (6): Conv2d(100, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): ReLU()
    (9): Conv2d(64, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): ReLU()
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=10, out_features=10, bias=True)
  )
)>
Total number of parameters: 209244
Epoch : [1/3] | Step : [100/750] | Loss : 0.5298

In [166]:
images, targets = next(iter(train_loader))

In [37]:
images.shape

torch.Size([64, 1, 28, 28])

In [42]:
len(targets.unique())

10

In [171]:
network.load_state_dict(torch.load("Accuracy_98.875_batchsize_64_lr_0.001.ckpt"))

with torch.no_grad():
    correct_predictions = 0
    test_images = 0
    testing_acc_history = []

    for images, targets in iter(test_loader):
        images = images.to(training_params['DEVICE'])
        targets = targets.to(training_params['DEVICE'])
        
        outputs = network(images)
        _, preds = torch.max(outputs, 1)
        correct_predictions = (preds==targets).sum().item()
        test_images += targets.shape[0]
        testing_acc_history.append(correct_predictions/targets.shape[0])
    
    print(f'Accuracy of the network on the {test_images} test images: {(sum(testing_acc_history)/len(testing_acc_history))*100}')

Accuracy of the network on the 10000 test images: 98.4375
