In [2]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import seaborn as sns
import time
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from IPython import display
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")


#### Pytorch device specific configuration ###
# # Pytorch Gpu Configuration for Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pytorch Gpu Configuration for directml(AMD GPU)
# import torch_directml

# device = torch_directml.device()

# Set default device
torch.set_default_device(device)

# # Font update global for all plots
# plt.rcParams.update({"font.size": 18})

In [None]:
# Import dataset
data = np.loadtxt(open("../Datasets/mnist_train_small.csv", "rb"), delimiter=",")

# Extract data and labels
labels = data[:, 0]
data = data[:, 1:]

# Normalize the data
dataNorm = data / np.max(data)

In [None]:
# Step 1: convert to tensor
dataT = torch.tensor(dataNorm).float()
labelsT = torch.tensor(labels).long()

# Step 2: Use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = train_test_split(
    dataT, labelsT, test_size=0.1
)

# Step 3: Convert to pytorch dataset
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

# Step 4: Translate into dataloader objects
batchSize = 32
train_loader = DataLoader(
    train_dataset,
    batch_size=batchSize,
    shuffle=True,
    drop_last=True,
    generator=torch.Generator(device=device),
)
test_loader = DataLoader(
    test_dataset,
    batch_size=test_dataset.tensors[0].shape[0],
    generator=torch.Generator(device=device),
)

In [5]:
# Model class function
def createTheModel():
    class mnistNet(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layers
            self.hidden1 = nn.Linear(64, 32)
            self.hidden2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.hidden1(x))
            x = F.relu(self.hidden2(x))
            x = self.output(x)
            return x

    # Instansiate the model
    net = mnistNet()

    # Loss Function
    lossFun = nn.CrossEntropyLoss()

    # optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

    return net, lossFun, optimizer

In [7]:
tmpnet = createTheModel()[0]
print(tmpnet)

print("\n\nWeights for layer fc1:")
print(tmpnet.hidden1.weight.data)

mnistNet(
  (input): Linear(in_features=784, out_features=64, bias=True)
  (hidden1): Linear(in_features=64, out_features=32, bias=True)
  (hidden2): Linear(in_features=32, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=10, bias=True)
)


Weights for layer fc1:
tensor([[-0.0161, -0.1243, -0.0525,  ..., -0.0495, -0.0487,  0.0282],
        [-0.0792, -0.0975, -0.1139,  ...,  0.0543, -0.0600, -0.0409],
        [-0.0773,  0.1006, -0.0329,  ...,  0.0799,  0.0707, -0.1013],
        ...,
        [ 0.0621, -0.0533,  0.1028,  ...,  0.0754, -0.1016, -0.1202],
        [-0.0324,  0.1150, -0.0979,  ...,  0.0594, -0.1109, -0.0211],
        [-0.0439,  0.1213,  0.0679,  ...,  0.0780, -0.0042,  0.0028]],
       device='cuda:0')


In [8]:
# Function to train the model
def trainTheModel(net, lossFun, optimizer, nEpochs):
    # initialize loss and accuracies
    losses = torch.zeros(nEpochs)
    trainAcc = []
    testAcc = []

    # Loop over epochs
    for epoch in range(nEpochs):
        # Put model in trainig model
        net.train()

        # Loop over training data batches
        batchAcc = []
        batchLoss = []

        for X, y in train_loader:
            # Forward pass
            yHat = net(X)
            loss = lossFun(yHat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss for this batch
            batchLoss.append(loss.item().cpu())

            # Compute Accuracy
            batchAcc.append(
                (100 * torch.mean((torch.argmax(yHat, axis=1) == y).float())).cpu()
            )
        # Batch Loop end
        # Train acc and losses
        trainAcc.append(np.mean(batchAcc))
        losses[epoch] = np.mean(batchLoss)

        # Test accuracy
        net.eval()
        X, y = next(iter(test_loader))
        with torch.no_grad():
            yHat = net(X)

        testAcc.append(
            (100 * torch.mean((torch.argmax(yHat, axis=1) == y).float())).cpu()
        )
    # Function output
    return trainAcc, testAcc, losses, net