In [74]:
import torch
import numpy as np
import pandas as pd
import os

In [75]:
train_data = pd.read_csv(filepath_or_buffer=os.path.join('data','digit-recognizer','train.csv'))

In [76]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
train_y = train_data.iloc[:,0]
train_x = train_data.iloc[:,1:]

In [78]:
VAL_PERCENT = 10

val_len = int(VAL_PERCENT/100*len(train_data))

split_idx = len(train_y)-val_len

val_x = train_x.iloc[split_idx:]
val_y = train_y.iloc[split_idx:]

train_x = train_x[:split_idx]
train_y = train_y[:split_idx]

val_test_x = val_x[0:int(len(val_x)/2)]
val_test_y = val_y[0:int(len(val_x)/2)]

In [79]:
train_x = torch.tensor(train_x.to_numpy())
val_x = torch.tensor(val_x.to_numpy())
train_y = torch.tensor(train_y.to_numpy())
val_y = torch.tensor(val_y.to_numpy())

val_test_x = torch.tensor(val_test_x.to_numpy())
val_test_y = torch.tensor(val_test_y.to_numpy())

In [80]:
print(train_x.size())
print(val_x.size())
print(train_y.size())
print(val_y.size())
print(val_test_x.size())
print(val_test_y.size())

torch.Size([37800, 784])
torch.Size([4200, 784])
torch.Size([37800])
torch.Size([4200])
torch.Size([2100, 784])
torch.Size([2100])


In [81]:
from pathlib import Path

test_data = pd.read_csv(filepath_or_buffer=Path('').joinpath('data','digit-recognizer','test.csv'))

test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [82]:
test_data = torch.tensor(test_data.to_numpy())

In [83]:
test_data.size()

torch.Size([28000, 784])

Creating Dataset Class

In [84]:
from torch.utils.data import Dataset
from torch.nn.functional import normalize
from typing import Tuple, Union


class CustomMNISTDataset(Dataset):
	def __init__(self, data_tensor:torch.Tensor, labels:torch.Tensor=None, do_normalise:bool=True) -> None:
		super().__init__()
		self.dataset = data_tensor
		self.labels = labels
		self.do_normalise = do_normalise
		
	def __len__(self) -> int:
		return self.dataset.size()[0]

	def __getitem__(self, index:int) -> Union[Tuple[torch.Tensor, torch.Tensor], torch.Tensor]:
		if(self.do_normalise):
			img = normalize(self.dataset[index].float(), dim=0)
		else:
			img = self.dataset[index].float()

		if(self.labels != None):
			return (img, self.labels[index])
		else:
			return img

In [85]:
train_dataset = CustomMNISTDataset(data_tensor=train_x, labels=train_y)
val_dataset = CustomMNISTDataset(data_tensor=val_x, labels=val_y)
val_test_dataset = CustomMNISTDataset(data_tensor=val_test_x, labels=val_test_y)
test_dataset = CustomMNISTDataset(data_tensor=test_data)

Creating DataLoader

In [87]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(dataset=train_dataset, batch_size=256, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=512, shuffle=False)
val_test_dataloader = DataLoader(dataset=val_test_dataset, batch_size=512, shuffle=False)

In [88]:
first_batch = next(iter(train_dataloader))

In [89]:
first_batch[1].shape

torch.Size([256])

Building Neural Network

In [118]:
import torch.nn as nn

class CustomNN(nn.Module):
	def __init__(self) -> None:
		super().__init__()

		self.input_layer = nn.modules.Linear(in_features=784, out_features=28, bias=False)
		self.h1 = nn.modules.Linear(in_features=28, out_features=14, bias=True)
		self.h2 = nn.modules.Linear(in_features=14, out_features=10, bias=True)
		self.output_layer = nn.modules.Linear(in_features=10, out_features=10, bias=True)
		self.h_act = nn.modules.ReLU()
		
	def forward(self, inp_data:torch.Tensor) -> torch.Tensor:

		out = self.input_layer(inp_data)
		out = self.h_act(out)
		out = self.h1(out)
		out = self.h_act(out)
		out = self.h2(out)
		out = self.h_act(out)
		out = self.output_layer(out)
		return out
	
net = CustomNN()

In [119]:
import time
import torch.nn.functional as F

with torch.no_grad():
	start_time = time.time_ns()
	test_output = F.softmax(input=net(first_batch[0]), dim=-1)
	end_time = time.time_ns()

print(f"Time taken : {(end_time-start_time)/(10**9)}")

Time taken : 0.00072798


Optimising Model Parameter

In [153]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

optimizer = SGD(params=net.parameters(), lr=1e-2)
loss_fn = CrossEntropyLoss()

epochs = 10

for i in range(epochs):
	print(f"Training Epoch {i+1}")

	epoch_loss = []

	for (data, labels) in train_dataloader:
		output = net(data) # output shape = batch_size * number of neurons in output layer , [batch_size,10]
		batch_loss = loss_fn(output, labels)
		batch_loss.backward()
		optimizer.step()

		epoch_loss.append(batch_loss.cpu().item())
	
	optimizer.zero_grad()
	epoch_loss = np.average(epoch_loss)
	print(f"Loss for epoch {i+1} = {epoch_loss}")


Training Epoch 1
Loss for epoch 1 = 0.10088257091371594
Training Epoch 2
Loss for epoch 2 = 0.10316209511428669
Training Epoch 3
Loss for epoch 3 = 0.09422728975580351
Training Epoch 4
Loss for epoch 4 = 0.08208535567580445
Training Epoch 5
Loss for epoch 5 = 0.08428954609946625
Training Epoch 6
Loss for epoch 6 = 0.0800251913307285
Training Epoch 7
Loss for epoch 7 = 0.07900892710313201
Training Epoch 8
Loss for epoch 8 = 0.0776975731604506
Training Epoch 9
Loss for epoch 9 = 0.0702596071578965
Training Epoch 10
Loss for epoch 10 = 0.08154640079954185


In [154]:
def calc_test_accuracy(network:nn.Module, test_dl:DataLoader) -> float:
	total_acc = []
	total_weights = []
	with torch.no_grad():
		for (data,labels) in test_dl:
			out = network(data) # shape = [batch_size,output_classes]
			_,out = torch.max(F.softmax(input=out, dim=-1), dim=-1) # shape = [batch_size]
			correct = torch.sum(out == labels).item()
			len_batch = data.size()[0]
			batch_acc = (correct/len_batch)*100
			total_acc.append(batch_acc)
			total_weights.append(len_batch)

	total_acc = np.average(total_acc, weights=total_weights)
	return total_acc


val_test_acc = calc_test_accuracy(network=net, test_dl=val_test_dataloader)
			
print(val_test_acc)


95.28571428571429


Doing Training With Validation

In [159]:
net = CustomNN()

optimizer = SGD(params=net.parameters(), lr=1e-2)
loss_fn = CrossEntropyLoss()

epochs = 25

last_val_acc = 0.0
plat_steps = 0

for i in range(epochs):
	print(f"Training Epoch {i+1}")

	epoch_loss = []

	for (data, labels) in train_dataloader:
		output = net(data) # output shape = batch_size * number of neurons in output layer , [batch_size,10]
		batch_loss = loss_fn(output, labels)
		batch_loss.backward()
		optimizer.step()

		epoch_loss.append(batch_loss.cpu().item())
	
	optimizer.zero_grad()
	epoch_loss = np.average(epoch_loss)
	print(f"Loss for epoch {i+1} = {epoch_loss}")

	val_acc = calc_test_accuracy(network=net, test_dl=val_dataloader)
	if(val_acc < last_val_acc):
		plat_steps += 1
	else:
		plat_steps = 0

	last_val_acc = val_acc

	if(plat_steps == 3):
		print("Training stopped early.")
		break
	
	

Training Epoch 1
Loss for epoch 1 = 2.308036006785728
Training Epoch 2
Loss for epoch 2 = 2.1854890576890997
Training Epoch 3
Loss for epoch 3 = 1.8348153225473456
Training Epoch 4
Loss for epoch 4 = 1.4656509716768522
Training Epoch 5
Loss for epoch 5 = 1.0830280446522944
Training Epoch 6
Loss for epoch 6 = 0.7842984851953145
Training Epoch 7
Loss for epoch 7 = 0.5744852474009668
Training Epoch 8
Loss for epoch 8 = 0.4452987192450343
Training Epoch 9
Loss for epoch 9 = 0.35441735457326917
Training Epoch 10
Loss for epoch 10 = 0.3015650441316334
Training Epoch 11
Loss for epoch 11 = 0.2685880505958119
Training Epoch 12
Loss for epoch 12 = 0.2558036116329399
Training Epoch 13
Loss for epoch 13 = 0.22428948704052615
Training Epoch 14
Loss for epoch 14 = 0.19986696052994277
Training Epoch 15
Loss for epoch 15 = 0.19503170850913268
Training Epoch 16
Loss for epoch 16 = 0.17261677018895344
Training Epoch 17
Loss for epoch 17 = 0.16222135480996724
Training Epoch 18
Loss for epoch 18 = 0.1525

Testing on val test dataset

In [160]:
test_acc = calc_test_accuracy(network=net, test_dl=val_test_dataloader)
print(test_acc)

94.42857142857143
