In [1]:
#Importing packages and checking GPU
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchsummary import summary
import numpy as np

if torch.cuda.is_available():
	print("PyTorch is using the GPU")
	GPUCount = torch.cuda.device_count()
	print(f"Found {GPUCount} GPUs")

	for i in range(GPUCount):
		print(f"GPU {i} found: {torch.cuda.get_device_name(i)}")

	device = torch.device("cuda:0")
else:
	print("PyTorch is using the CPU")
	device = torch.device("cpu")

print(f"Selected Device: {device}")

PyTorch is using the GPU
Found 1 GPUs
GPU 0 found: NVIDIA GeForce RTX 5070 Laptop GPU
Selected Device: cuda:0


In [2]:
#Load dataset
Transform = transforms.Compose(
	[transforms.ToTensor(),
  transforms.Normalize((0.5,),(0.5,))])

TrainSet = torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=Transform)
TrainLoader = DataLoader(TrainSet, batch_size=128, shuffle=True)

TestSet = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=Transform)
TestLoader = DataLoader(TestSet, batch_size=128, shuffle=False)

In [3]:
#Creating Model

#Define architecture
class MNISTModel(nn.Module):
	def __init__(self):
		super(MNISTModel,self).__init__()

		#Layer1
		self.conv1 = nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3)
		self.relu1 = nn.ReLU()
		self.pool1 = nn.MaxPool2d(kernel_size=2)

		#Layer2
		self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3)
		self.relu2 = nn.ReLU()
		self.pool2 = nn.MaxPool2d(kernel_size=2)

		#Layer3
		self.conv3 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3)

		#Layer4
		self.flatten = nn.Flatten()

		#Layer5
		self.fc1 = nn.Linear(in_features=64*3*3,out_features=64)
		self.relu3 = nn.ReLU()

		#Layer6
		self.fc2 = nn.Linear(in_features=64,out_features=10)

	def forward(self,x):

		#Pass through Layer1
		x = self.pool1(self.relu1(self.conv1(x)))

		#Pass through Layer2
		x = self.pool2(self.relu2(self.conv2(x)))

		#Pass through Layer3
		x = self.conv3(x)

		#Pass through Layer4
		x = self.flatten(x)

		#Pass through Layer5
		x = self.relu3(self.fc1(x))

		#Pass through Layer6
		x = self.fc2(x)

		#Return Prediction
		return x

#Create and print summary	
print("Creating MNIST Model...")
model = MNISTModel().to(device)
print("Model Created")

print("Model Summary: ")
summary(model,input_size=(1,28,28))

#Compile model
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Creating MNIST Model...
Model Created
Model Summary: 
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             320
              ReLU-2           [-1, 32, 26, 26]               0
         MaxPool2d-3           [-1, 32, 13, 13]               0
            Conv2d-4           [-1, 64, 11, 11]          18,496
              ReLU-5           [-1, 64, 11, 11]               0
         MaxPool2d-6             [-1, 64, 5, 5]               0
            Conv2d-7             [-1, 64, 3, 3]          36,928
           Flatten-8                  [-1, 576]               0
            Linear-9                   [-1, 64]          36,928
             ReLU-10                   [-1, 64]               0
           Linear-11                   [-1, 10]             650
Total params: 93,322
Trainable params: 93,322
Non-trainable params: 0
-------------------------------------------

In [4]:
#Training and saving the model

print("Training Model...")
epochs = 10

for epoch in range(epochs):
	#Train Loop

	#Set model to training mode
	model.train()
	TrainLoss = 0.0

	for i,data in enumerate(TrainLoader,0):
		inputs,labels = data[0].to(device),data[1].to(device)

		#Zero the parameter gradients
		optimizer.zero_grad()

		#Forward pass
		outputs = model(inputs)

		#Calculate loss
		loss = loss_function(outputs,labels)

		#Backward pass
		loss.backward()

		#Update weights
		optimizer.step()

		#Update loss
		TrainLoss += loss.item()
	
	#Validation Loop
	model.eval()
	ValidationLoss = 0.0
	correct = 0
	total = 0

	with torch.no_grad():
		for data in TestLoader:
			images,labels = data[0].to(device),data[1].to(device)
			outputs = model(images)
			loss = loss_function(outputs,labels)
			ValidationLoss += loss.item()

			_,predicted = torch.max(outputs.data,1)
			total += labels.size(0)
			correct += (predicted == labels).sum().item()

		acc = 100*correct/total
		print(f"Epoch: {epoch+1}/{epochs} | "
			f"Training Loss: {TrainLoss/len(TrainLoader):.3f} | "
			f"Validation Loss: {ValidationLoss / len(TestLoader):.3f} | "
			f"Accuracy: {acc:.3f}%")
		
print("Training Complete")

#Save the model
torch.save(model.state_dict(),"MNISTModel.pth")
print("Model Saved")

Training Model...
Epoch: 1/10 | Training Loss: 0.223 | Validation Loss: 0.056 | Accuracy: 98.050%
Epoch: 2/10 | Training Loss: 0.057 | Validation Loss: 0.050 | Accuracy: 98.230%
Epoch: 3/10 | Training Loss: 0.043 | Validation Loss: 0.038 | Accuracy: 98.700%
Epoch: 4/10 | Training Loss: 0.034 | Validation Loss: 0.035 | Accuracy: 98.820%
Epoch: 5/10 | Training Loss: 0.028 | Validation Loss: 0.030 | Accuracy: 99.040%
Epoch: 6/10 | Training Loss: 0.024 | Validation Loss: 0.036 | Accuracy: 98.860%
Epoch: 7/10 | Training Loss: 0.020 | Validation Loss: 0.035 | Accuracy: 98.900%
Epoch: 8/10 | Training Loss: 0.018 | Validation Loss: 0.033 | Accuracy: 99.000%
Epoch: 9/10 | Training Loss: 0.016 | Validation Loss: 0.040 | Accuracy: 98.880%
Epoch: 10/10 | Training Loss: 0.015 | Validation Loss: 0.030 | Accuracy: 99.100%
Training Complete
Model Saved


In [5]:
#Testing Model
print("Testing model with inbuilt dataset...")

model.eval()

TestLoss = 0.0
correct = 0
total = 0

with torch.no_grad():
	for images, labels in TestLoader:

		#Move data to device
		images,labels = images.to(device),labels.to(device)

		#Forward pass
		outputs = model(images)

		#Calculate loss
		loss = loss_function(outputs,labels)
		TestLoss += loss.item()

		#Get predicted class
		_,predicted = torch.max(outputs.data,1)

		#Update total and correct counts
		total += labels.size(0)
		correct += (predicted == labels).sum().item()
	
#Test Results

FinalLoss = TestLoss/len(TestLoader)
FinalAcc = 100*correct/total

print(f"Test Accuracy: {FinalAcc:.3f}%")
print(f"Test Loss: {FinalLoss:.3f}")

Testing model with inbuilt dataset...
Test Accuracy: 99.100%
Test Loss: 0.030
