In [0]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torch.optim as optim

In [0]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))
])

In [0]:
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)

In [0]:
train_image_zero, train_target_zero = trainset[0]

In [0]:
print(train_target_zero)#, train_image_zero

5


In [0]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [0]:
class ANN(nn.Module):
  def __init__(self):
    super().__init__()

    #Inputs to hidden layer linear transformation
    self.hidden1 = nn.Linear(784, 256)
    #Output Layer 10 units - One for each digit.
    self.output = nn.Linear(256, 10)

  def forward(self, x):
    #pass the input tensor to each of the operations.
    x = self.hidden1(x)
    x = F.relu(x)
    x = self.output(x)
    return x
model1 = ANN()
print(model1)

ANN(
  (hidden1): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)


In [0]:
#Hyperparameters for the network

input_size = 784
hidden_size = [128, 64]
output_size = 10

#Building a Feed Forward Neural Network.
model2 = nn.Sequential(nn.Linear(input_size, hidden_size[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_size[0], hidden_size[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_size[1], output_size))
print(model2)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
)


In [0]:
from collections import OrderedDict
model3 = nn.Sequential(OrderedDict([
                                   ('fc1', nn.Linear(input_size, hidden_size[0])),
                                   ('relu1', nn.ReLU()),
                                   ('fc2', nn.Linear(hidden_size[0], hidden_size[1])),
                                   ('relu2', nn.ReLU()),
                                   ('output', nn.Linear(hidden_size[1], output_size))
]))
print(model3)

Sequential(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (output): Linear(in_features=64, out_features=10, bias=True)
)


In [0]:
#Loss Function
loss_criterion = nn.CrossEntropyLoss()
#Optimizer 
optimizer1 =  optim.SGD(model1.parameters(), lr=0.03, momentum=0.9)
optimizer2 =  optim.SGD(model2.parameters(), lr=0.03, momentum=0.9)
optimizer3 =  optim.SGD(model3.parameters(), lr=0.03, momentum=0.9)

In [110]:
epochs = 5
for e in range(epochs):
  running_loss = 0
  for images, labels in trainloader:
    #Flatten Images in 784 Long Vector
    images = images.view(images.shape[0], -1)
    #Zero the Parameter Gradients
    optimizer1.zero_grad()
    #Forward Pass
    output = model1(images)
    #Determining Loss
    loss = loss_criterion(output, labels)
    #Backward propagation
    loss.backward()
    #Updating the Weights value with Optimizer Gradient
    optimizer1.step()
    #Print Statistics
    running_loss += loss.item()
  else:
    print(f"Epoch {e} ,Training Loss: {running_loss/len(trainloader)}")

Epoch 0 ,Training Loss: 0.07093312929762698
Epoch 1 ,Training Loss: 0.07092298397214523
Epoch 2 ,Training Loss: 0.07092627833785016
Epoch 3 ,Training Loss: 0.07094359773594433
Epoch 4 ,Training Loss: 0.0709316171169567


In [0]:
epochs = 5
for e in range(epochs):
  running_loss = 0
  for images, labels in trainloader:
    #Flatten Images in 784 Long Vector
    images = images.view(images.shape[0], -1)
    #Zero the Parameter Gradients
    optimizer2.zero_grad()
    #Forward Pass
    output = model2(images)
    #Determining Loss
    loss = loss_criterion(output, labels)
    #Backward propagation
    loss.backward()
    #Updating the Weights value with Optimizer Gradient
    optimizer2.step()
    #Print Statistics
    running_loss += loss.item()
  else:
    print(f"Epoch {e} ,Training Loss: {running_loss/len(trainloader)}")

Epoch 0 ,Training Loss: 0.36229515008961977
Epoch 1 ,Training Loss: 0.1672593816262938
Epoch 2 ,Training Loss: 0.13206911872504457
Epoch 3 ,Training Loss: 0.11324576741985
Epoch 4 ,Training Loss: 0.09721026822814405


In [0]:
epochs = 5
for e in range(epochs):
  running_loss = 0
  for images, labels in trainloader:
    #Flatten Images in 784 Long Vector
    images = images.view(images.shape[0], -1)
    #Zero the Parameter Gradients
    optimizer3.zero_grad()
    #Forward Pass
    output = model3(images)
    #Determining Loss
    loss = loss_criterion(output, labels)
    #Backward propagation
    loss.backward()
    #Updating the Weights value with Optimizer Gradient
    optimizer3.step()
    #Print Statistics
    running_loss += loss.item()
  else:
    print(f"Epoch {e} ,Training Loss: {running_loss/len(trainloader)}")

Epoch 0 ,Training Loss: 0.35075934372468986
Epoch 1 ,Training Loss: 0.16914509085298918
Epoch 2 ,Training Loss: 0.13273395632784854
Epoch 3 ,Training Loss: 0.11268725900648674
Epoch 4 ,Training Loss: 0.0988305224530669


In [0]:
#Saving Class Models to the Directory
M1_PATH = '/content/drive/My Drive/Pytorch/model1.pkl'
torch.save(model1.state_dict(), M1_PATH)

In [0]:
#Saving Sequentail Models to the Directory
M2_PATH = '/content/drive/My Drive/Pytorch/model2.pkl'
torch.save(model2.state_dict(), M2_PATH)

In [0]:
#Saving Ordered Sequentail Models to the Directory
M3_PATH = '/content/drive/My Drive/Pytorch/model3.pkl'
torch.save(model3.state_dict(), M3_PATH)

In [88]:
model1 = ANN()
model1.load_state_dict(torch.load(M1_PATH))

<All keys matched successfully>

In [90]:
model2.load_state_dict(torch.load(M2_PATH))

<All keys matched successfully>

In [91]:
model3.load_state_dict(torch.load(M3_PATH))

<All keys matched successfully>

In [0]:
#Predcition
testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

In [95]:
len(testset)

10000

In [99]:
testset[0][0].shape

torch.Size([1, 28, 28])

In [0]:
testloader = torch.utils.data.DataLoader(testset, batch_size=64)

In [109]:
m1_correct = 0
m2_correct = 0
m3_correct = 0
total = 0
with torch.no_grad():
  for images, labels in testloader:
    #Flatten Images in 784 Long Vector
    images = images.view(images.shape[0], -1)
    # Store Output
    m1_output = model1(images)
    m2_output = model2(images)
    m3_output = model3(images)

    _, predicted1 = torch.max(m1_output, 1)
    _, predicted2 = torch.max(m2_output, 1)
    _, predicted3 = torch.max(m3_output, 1)

    total += labels.size(0)
    m1_correct += (predicted1 == labels).sum().item()
    m2_correct += (predicted2 == labels).sum().item()
    m3_correct += (predicted3 == labels).sum().item()

print(f'Accuracy of the Model1 on the 10000 test images: {(100*m1_correct)/total}')
print(f'Accuracy of the Model2 on the 10000 test images: {(100*m2_correct)/total}')
print(f'Accuracy of the Model3 on the 10000 test images: {(100*m3_correct)/total}')

Accuracy of the Model1 on the 10000 test images: 97.08
Accuracy of the Model2 on the 10000 test images: 96.77
Accuracy of the Model3 on the 10000 test images: 96.5
