In [1]:
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch.optim as optim
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix
from torch.utils.data import DataLoader, Dataset


In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
trainset = torchvision.datasets.MNIST(root='/home/student/Desktop/220962404/lab4/data',
                                       train=True, download=False, transform=transform)
testset = torchvision.datasets.MNIST(root='/home/student/Desktop/220962404/lab4/data',
                                     train=False, download=False, transform=transform)

trainloader = DataLoader(trainset, batch_size=64,  shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

In [4]:
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3),
                                 nn.ReLU(),
                                 nn.MaxPool2d((2,2), stride=2),
                                 nn.Conv2d(64, 128, kernel_size=3),
                                 nn.ReLU(),
                                 nn.MaxPool2d((2,2), stride=2),
                                 nn.Conv2d(128, 64, kernel_size=3),
                                 nn.ReLU(),
                                 nn.MaxPool2d((2,2), stride=2)
        )
        self.classification_head = nn.Sequential(nn.Linear(64, 20, bias=True),
                                                 nn.ReLU(),
                                                 nn.Linear(20,10,bias=True)
        )
        
    def forward(self, x):
        features = self.net(x)
        return self.classification_head(features.view(features.size(0),-1))  


In [5]:
model = CNNClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

num_epochs = 5

for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in trainloader:
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")


Epoch [1/5], Loss: 0.2842, Accuracy: 90.96%
Epoch [2/5], Loss: 0.0795, Accuracy: 97.55%
Epoch [3/5], Loss: 0.0580, Accuracy: 98.20%
Epoch [4/5], Loss: 0.0462, Accuracy: 98.59%
Epoch [5/5], Loss: 0.0381, Accuracy: 98.79%


In [7]:
print("Model's state_dict")
for param_tensor in model.state_dict().keys():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print()

print("Optimizer's state_dict")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict
net.0.weight 	 torch.Size([64, 1, 3, 3])
net.0.bias 	 torch.Size([64])
net.3.weight 	 torch.Size([128, 64, 3, 3])
net.3.bias 	 torch.Size([128])
net.6.weight 	 torch.Size([64, 128, 3, 3])
net.6.bias 	 torch.Size([64])
classification_head.0.weight 	 torch.Size([20, 64])
classification_head.0.bias 	 torch.Size([20])
classification_head.2.weight 	 torch.Size([10, 20])
classification_head.2.bias 	 torch.Size([10])

Optimizer's state_dict
state 	 {0: {'step': tensor(4690.), 'exp_avg': tensor([[[[-7.3764e-04, -4.6144e-04,  2.4608e-04],
          [-8.6669e-05,  1.4600e-04,  2.2094e-04],
          [-6.3323e-05, -2.8136e-04, -6.3068e-04]]],


        [[[ 8.0095e-04,  3.4327e-04, -2.2040e-04],
          [ 5.3925e-04, -6.6193e-05, -2.7972e-04],
          [-2.6709e-04, -6.5768e-04, -5.5187e-04]]],


        [[[-6.4597e-04, -3.9098e-04,  1.0003e-04],
          [-7.0474e-04,  4.4735e-04,  4.1375e-04],
          [ 3.9398e-05,  7.3960e-04,  5.0164e-04]]],


        [[[ 1.0938e-03,  

In [8]:
torch.save(model, "./MNISTParams/model.pt")

In [9]:
mnist_testset = torchvision.datasets.FashionMNIST(root="./data", train=False,
                                                  download=True, transform=transform)
test_loader = DataLoader(mnist_testset, batch_size=64, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
model = CNNClassifier()
model = torch.load("./MNISTParams/model.pt")
model.to(device)

  model = torch.load("./MNISTParams/model.pt")


CNNClassifier(
  (net): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classification_head): Sequential(
    (0): Linear(in_features=64, out_features=20, bias=True)
    (1): ReLU()
    (2): Linear(in_features=20, out_features=10, bias=True)
  )
)

In [12]:
print("Model's state_dict:")
for param_tensor in model.state_dict().keys():
    print(param_tensor, "\t",model.state_dict()[param_tensor].size())
print()

Model's state_dict:
net.0.weight 	 torch.Size([64, 1, 3, 3])
net.0.bias 	 torch.Size([64])
net.3.weight 	 torch.Size([128, 64, 3, 3])
net.3.bias 	 torch.Size([128])
net.6.weight 	 torch.Size([64, 128, 3, 3])
net.6.bias 	 torch.Size([64])
classification_head.0.weight 	 torch.Size([20, 64])
classification_head.0.bias 	 torch.Size([20])
classification_head.2.weight 	 torch.Size([10, 20])
classification_head.2.bias 	 torch.Size([10])



In [14]:
model.eval()
correct = 0
total = 0
for i, vdata in enumerate(test_loader):
    tinputs, tlabels = vdata
    tinputs = tinputs.to(device)
    tlabels = tlabels.to(device)
    toutputs = model(tinputs)
    
    _, predicted = torch.max(toutputs, 1)
    #print("True label:{}".format(tlabels))
    #print('Predicted: {}'.format(predicted))
    
    total += tlabels.size(0)
    
    correct += (predicted == tlabels).sum()
accuracy = 100.0 * correct / total
print("The overall accuracy is {}".format(accuracy))

The overall accuracy is 6.299999713897705
