In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.models import vgg13
import matplotlib.pyplot as plt

In [2]:
transform = transforms.Compose([
    transforms.Resize((24,24)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


trainset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
testset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 10853806.01it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 173289.72it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3204646.26it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 24125449.15it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw



In [3]:
class MiniVGG(nn.Module):
    def __init__(self, num_classes= 10):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels= 1, out_channels= 64, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 64, out_channels= 64, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),

            nn.Conv2d(in_channels= 64, out_channels= 128, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 128, out_channels= 128, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),

            nn.Conv2d(in_channels= 128, out_channels= 256, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 256, out_channels= 256, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)))

        self.classifier = nn.Linear(256 * 3 * 3, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [4]:
from torchvision.models import vgg13
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Specify the path to the pre-trained model on Google Drive
model_weights_path = '/content/drive/MyDrive/hw4/cifar10_mini_vgg.pth'

# Load the VGG13 model
model_cifar = MiniVGG() #vgg13(pretrained=False)

# Load pre-trained weights
model_cifar.load_state_dict(torch.load(model_weights_path), strict=False)

<All keys matched successfully>

In [5]:

for param in model_cifar.parameters():
    param.requires_grad = False
for param in model_cifar.classifier.parameters():
    param.requires_grad = True

In [6]:

optimizer = torch.optim.SGD(model_cifar.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_cifar.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_cifar(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')

    model_cifar.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_cifar(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)




Epoch [1/5], Batch [100/938], Loss: 0.6617
Epoch [1/5], Batch [200/938], Loss: 0.5090
Epoch [1/5], Batch [300/938], Loss: 0.4992
Epoch [1/5], Batch [400/938], Loss: 0.3960
Epoch [1/5], Batch [500/938], Loss: 0.4270
Epoch [1/5], Batch [600/938], Loss: 0.4036
Epoch [1/5], Batch [700/938], Loss: 0.5070
Epoch [1/5], Batch [800/938], Loss: 0.2433
Epoch [1/5], Batch [900/938], Loss: 0.5614
Epoch [1/5], Loss: 0.2918, Accuracy: 0.8527
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.3633
Epoch [2/5], Batch [200/938], Loss: 0.7189
Epoch [2/5], Batch [300/938], Loss: 0.4534
Epoch [2/5], Batch [400/938], Loss: 0.4883
Epoch [2/5], Batch [500/938], Loss: 0.3202
Epoch [2/5], Batch [600/938], Loss: 0.4315
Epoch [2/5], Batch [700/938], Loss: 0.3377
Epoch [2/5], Batch [800/938], Loss: 0.4236
Epoch [2/5], Batch [900/938], Loss: 0.3087
Epoch [2/5], Loss: 0.1268, Accuracy: 0.8758
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.2096
Epoch [3/5], 

In [7]:
model_weights_path = '/content/drive/MyDrive/hw4/mnist_mini_vgg.pth'

# Load the VGG13 model
model_mnist = MiniVGG() # vgg13(pretrained=False)

# Load pre-trained weights
model_mnist.load_state_dict(torch.load(model_weights_path), strict=False)

# Freeze the Pre-Trained Model Layers and unfreeze the last layer
for param in model_mnist.parameters():
    param.requires_grad = False
for param in model_mnist.classifier.parameters():
    param.requires_grad = True

In [8]:
model_mnist.to(device)

optimizer = torch.optim.SGD(model_mnist.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_mnist.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_mnist(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_mnist.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_mnist(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 1.1311
Epoch [1/5], Batch [200/938], Loss: 0.7735
Epoch [1/5], Batch [300/938], Loss: 0.7114
Epoch [1/5], Batch [400/938], Loss: 0.7976
Epoch [1/5], Batch [500/938], Loss: 0.6617
Epoch [1/5], Batch [600/938], Loss: 0.5872
Epoch [1/5], Batch [700/938], Loss: 0.7233
Epoch [1/5], Batch [800/938], Loss: 0.5184
Epoch [1/5], Batch [900/938], Loss: 0.3273
Epoch [1/5], Loss: 0.2933, Accuracy: 0.7967
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.5218
Epoch [2/5], Batch [200/938], Loss: 0.7188
Epoch [2/5], Batch [300/938], Loss: 0.3916
Epoch [2/5], Batch [400/938], Loss: 0.7922
Epoch [2/5], Batch [500/938], Loss: 0.5021
Epoch [2/5], Batch [600/938], Loss: 0.4386
Epoch [2/5], Batch [700/938], Loss: 0.6724
Epoch [2/5], Batch [800/938], Loss: 0.5040
Epoch [2/5], Batch [900/938], Loss: 0.5036
Epoch [2/5], Loss: 0.5524, Accuracy: 0.8229
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.6235
Epoch [3/5], 

In [9]:
model_HW3 = MiniVGG()

model_HW3.to(device)
optimizer = torch.optim.SGD(model_HW3.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_HW3.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_HW3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_HW3.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_HW3(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 2.3014
Epoch [1/5], Batch [200/938], Loss: 2.3011
Epoch [1/5], Batch [300/938], Loss: 2.2991
Epoch [1/5], Batch [400/938], Loss: 2.2994
Epoch [1/5], Batch [500/938], Loss: 2.2980
Epoch [1/5], Batch [600/938], Loss: 2.2929
Epoch [1/5], Batch [700/938], Loss: 2.2791
Epoch [1/5], Batch [800/938], Loss: 2.2451
Epoch [1/5], Batch [900/938], Loss: 1.4558
Epoch [1/5], Loss: 1.5263, Accuracy: 0.5547
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.9502
Epoch [2/5], Batch [200/938], Loss: 0.8683
Epoch [2/5], Batch [300/938], Loss: 0.7887
Epoch [2/5], Batch [400/938], Loss: 0.8595
Epoch [2/5], Batch [500/938], Loss: 0.7387
Epoch [2/5], Batch [600/938], Loss: 0.7112
Epoch [2/5], Batch [700/938], Loss: 0.7692
Epoch [2/5], Batch [800/938], Loss: 0.6742
Epoch [2/5], Batch [900/938], Loss: 0.5151
Epoch [2/5], Loss: 0.6823, Accuracy: 0.7694
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.6091
Epoch [3/5], 

Mô hình đã được huấn luyện trên tập dữ liệu CIFAR-10 và đạt độ chính xác cao nhất sau 5 epochs. Việc này có thể được giải thích bởi sự tương đồng giữa các đặc trưng trong tập dữ liệu CIFAR và tập dữ liệu FashionMNIST, so với sự khác biệt rõ rệt với tập dữ liệu MNIST.

Tuy nhiên, nếu quyết định tiếp tục huấn luyện mô hình trong thời gian dài hơn, mô hình MiniVGG được đào tạo từ đầu sẽ có hiệu suất vượt trội so với hai mô hình đã được pretrained. Lý do nằm ở việc các lớp mạng sẽ điều chỉnh tốt hơn để phù hợp với tập dữ liệu FashionMNIST. Trong khi hai mô hình pretrained đã freeze các feature, điều này có nghĩa rằng nếu tiếp tục huấn luyện thêm epochs, các lớp của mô hình sẽ không còn khả năng học thêm thông tin mới và hữu ích.

In [10]:

from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_HW3)

In [11]:
train_nodes


['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'flatten',
 'classifier']

In [12]:
create_feature_extractor(model_HW3, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [13]:
model_HW3.features[0].weight

Parameter containing:
tensor([[[[-2.9211e-02,  2.6881e-01, -2.0693e-01],
          [ 1.3031e-01,  6.9950e-02,  1.2632e-01],
          [-2.2441e-01,  2.1340e-01, -4.9494e-02]]],


        [[[-1.6189e-01, -2.7329e-01,  3.1640e-01],
          [-1.1625e-01, -3.9001e-01, -2.0639e-01],
          [-5.3494e-02,  6.3707e-02, -1.5606e-01]]],


        [[[ 2.9967e-02,  2.9572e-01, -3.4180e-01],
          [ 2.1384e-01,  2.5384e-01, -1.1366e-01],
          [ 2.4663e-01,  1.2689e-01, -1.1666e-01]]],


        [[[ 1.0593e-01,  2.0821e-01,  3.3167e-01],
          [ 1.8188e-01,  7.3705e-03, -8.2513e-02],
          [ 1.8692e-01, -7.1669e-02,  2.6817e-01]]],


        [[[ 3.1870e-01,  2.3149e-01,  2.5392e-01],
          [-2.9224e-01, -9.3522e-02,  1.5402e-01],
          [ 2.2624e-01,  2.2393e-01,  2.9532e-01]]],


        [[[-3.2456e-01,  2.3363e-01,  2.7301e-02],
          [ 1.7609e-01,  1.6340e-01, -1.3397e-01],
          [ 1.5203e-01, -8.8413e-02,  3.8393e-02]]],


        [[[ 9.2415e-03, -2.1052e-01,