In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models

In [None]:
# Set the device (GPU if available, else CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# **Data augmentation**

Normalize ảnh từ scale [0, 255] về [-1,1], sử dụng ít nhất 2 trong số các
- transformations dưới đây cho dataset
- Random resized
- Center cropping
- Random vertical flipping
- Random horizontal flipping
- Các loại transformation khác có thể tham khảo ở đây https://pytorch.org/vision/master/transforms.html


In [None]:
# Define data transformations for FashionMNIST
transform = transforms.Compose([
    transforms.RandomResizedCrop(28),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1, 1]
])

In [None]:
# Download and load FashionMNIST dataset
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.FashionMNIST(root="./data", train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# **Finetuning**
Cho mô hình MiniVGG như dưới, hãy load 2 pretrained models vào model MiniVGG ở dưới. Freeze self.features và train lớp cuối (self.classifier) của 2 pretrained models trên dataset FashionMNIST. Sau đó, train model MiniVGG from scratch trên tập FashionMNIST.

In [None]:
class MiniVGG(nn.Module):
    def __init__(self,):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1,64,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(64,64,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2,stride=2),

                                      nn.Conv2d(64,128,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(128,128,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2,stride=2),

                                      nn.Conv2d(128,256,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(256,256,kernel_size=3,padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.MaxPool2d(kernel_size=2, stride=2),


        )
        self.classifier = nn.Linear(256*3*3,10)
        nn.init.normal_(self.classifier.weight,0, 0.01)
        nn.init.constant_(self.classifier.bias,0)


    def forward(self,x,):
        x = self.features(x)
        x = torch.flatten(x,1)
        x = self.classifier(x)
        return x

In [None]:
# File paths for the pretrained models
cifar10_model_path = '/content/drive/MyDrive/cifar10_mini_vgg.pth'
mnist_model_path = '/content/drive/MyDrive/mnist_mini_vgg.pth'

In [None]:
cifar10_pretrained_model = MiniVGG().to(device)
cifar10_pretrained_model.load_state_dict(torch.load(cifar10_model_path))
cifar10_pretrained_model.features.requires_grad_(False)  # Freeze the feature layers
cifar10_pretrained_model.eval()

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
mnist_pretrained_model = MiniVGG().to(device)
mnist_pretrained_model.load_state_dict(torch.load(mnist_model_path))
mnist_pretrained_model.features.requires_grad_(False)  # Freeze the feature layers
mnist_pretrained_model.eval()

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
from_scratch_model =  MiniVGG().to(device)
from_scratch_model.eval()

MiniVGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_cifar10 = optim.SGD(cifar10_pretrained_model.parameters(), lr=1e-4, momentum=0.9)
optimizer_mnist = optim.SGD(mnist_pretrained_model.parameters(), lr=1e-4, momentum=0.9)
optimizer_scratch = optim.SGD(from_scratch_model.parameters(), lr=1e-4, momentum=0.9)

In [None]:
# Fine-tune the classifiers on FashionMNIST
for epoch in range(5):
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Fine-tune the CIFAR-10 pretrained model's classifier
        optimizer_cifar10.zero_grad()
        outputs_cifar10 = cifar10_pretrained_model(inputs)
        loss_cifar10 = criterion(outputs_cifar10, labels)
        loss_cifar10.backward()
        optimizer_cifar10.step()

        if (i + 1) % 100 == 0:
            print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
                epoch + 1, i + 1, len(trainloader), loss_cifar10.item()
            ))

Epoch: 1/5, Step: 100/938, Loss: 0.7044
Epoch: 1/5, Step: 200/938, Loss: 0.9209
Epoch: 1/5, Step: 300/938, Loss: 0.7679
Epoch: 1/5, Step: 400/938, Loss: 0.7104
Epoch: 1/5, Step: 500/938, Loss: 0.8199
Epoch: 1/5, Step: 600/938, Loss: 0.5380
Epoch: 1/5, Step: 700/938, Loss: 0.6599
Epoch: 1/5, Step: 800/938, Loss: 1.1132
Epoch: 1/5, Step: 900/938, Loss: 0.7721
Epoch: 2/5, Step: 100/938, Loss: 0.7535
Epoch: 2/5, Step: 200/938, Loss: 0.5676
Epoch: 2/5, Step: 300/938, Loss: 0.6879
Epoch: 2/5, Step: 400/938, Loss: 0.7296
Epoch: 2/5, Step: 500/938, Loss: 0.8615
Epoch: 2/5, Step: 600/938, Loss: 0.7339
Epoch: 2/5, Step: 700/938, Loss: 0.6266
Epoch: 2/5, Step: 800/938, Loss: 0.5322
Epoch: 2/5, Step: 900/938, Loss: 0.7568
Epoch: 3/5, Step: 100/938, Loss: 0.8622
Epoch: 3/5, Step: 200/938, Loss: 0.5696
Epoch: 3/5, Step: 300/938, Loss: 0.7974
Epoch: 3/5, Step: 400/938, Loss: 0.7980
Epoch: 3/5, Step: 500/938, Loss: 0.8674
Epoch: 3/5, Step: 600/938, Loss: 0.4507
Epoch: 3/5, Step: 700/938, Loss: 0.4875


In [None]:
# Fine-tune the classifiers on FashionMNIST
for epoch in range(5):
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Fine-tune the MNIST pretrained model's classifier
        optimizer_mnist.zero_grad()
        outputs_mnist = mnist_pretrained_model(inputs)
        loss_mnist = criterion(outputs_mnist, labels)
        loss_mnist.backward()
        optimizer_mnist.step()

        if (i + 1) % 100 == 0:
          print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
              epoch + 1, i + 1, len(trainloader), loss_mnist.item()
          ))


Epoch: 1/5, Step: 100/938, Loss: 3.6291
Epoch: 1/5, Step: 200/938, Loss: 2.6584
Epoch: 1/5, Step: 300/938, Loss: 2.1027
Epoch: 1/5, Step: 400/938, Loss: 1.7754
Epoch: 1/5, Step: 500/938, Loss: 1.6430
Epoch: 1/5, Step: 600/938, Loss: 1.8744
Epoch: 1/5, Step: 700/938, Loss: 2.1913
Epoch: 1/5, Step: 800/938, Loss: 1.9600
Epoch: 1/5, Step: 900/938, Loss: 1.5755
Epoch: 2/5, Step: 100/938, Loss: 1.7939
Epoch: 2/5, Step: 200/938, Loss: 1.4917
Epoch: 2/5, Step: 300/938, Loss: 1.5072
Epoch: 2/5, Step: 400/938, Loss: 1.4339
Epoch: 2/5, Step: 500/938, Loss: 1.7829
Epoch: 2/5, Step: 600/938, Loss: 1.6370
Epoch: 2/5, Step: 700/938, Loss: 1.2512
Epoch: 2/5, Step: 800/938, Loss: 1.3551
Epoch: 2/5, Step: 900/938, Loss: 1.6325
Epoch: 3/5, Step: 100/938, Loss: 1.2950
Epoch: 3/5, Step: 200/938, Loss: 1.5749
Epoch: 3/5, Step: 300/938, Loss: 1.5196
Epoch: 3/5, Step: 400/938, Loss: 1.4739
Epoch: 3/5, Step: 500/938, Loss: 1.3388
Epoch: 3/5, Step: 600/938, Loss: 1.8739
Epoch: 3/5, Step: 700/938, Loss: 1.5552


In [None]:
# Train MiniVGG from scratch
for epoch in range(5):
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Train from scratch
        optimizer_scratch.zero_grad()
        outputs_scratch = from_scratch_model(inputs)
        loss = criterion(outputs_scratch, labels)
        loss.backward()
        optimizer_scratch.step()

        if (i + 1) % 100 == 0:
          print("Epoch: {}/5, Step: {}/{}, Loss: {:.4f}".format(
              epoch + 1, i + 1, len(trainloader), loss.item()
          ))


Epoch: 1/5, Step: 100/938, Loss: 2.3011
Epoch: 1/5, Step: 200/938, Loss: 2.3023
Epoch: 1/5, Step: 300/938, Loss: 2.3031
Epoch: 1/5, Step: 400/938, Loss: 2.3028
Epoch: 1/5, Step: 500/938, Loss: 2.3025
Epoch: 1/5, Step: 600/938, Loss: 2.3004
Epoch: 1/5, Step: 700/938, Loss: 2.3023
Epoch: 1/5, Step: 800/938, Loss: 2.3024
Epoch: 1/5, Step: 900/938, Loss: 2.3018
Epoch: 2/5, Step: 100/938, Loss: 2.3009
Epoch: 2/5, Step: 200/938, Loss: 2.3013
Epoch: 2/5, Step: 300/938, Loss: 2.3023
Epoch: 2/5, Step: 400/938, Loss: 2.3039
Epoch: 2/5, Step: 500/938, Loss: 2.3020
Epoch: 2/5, Step: 600/938, Loss: 2.3017
Epoch: 2/5, Step: 700/938, Loss: 2.3017
Epoch: 2/5, Step: 800/938, Loss: 2.3020
Epoch: 2/5, Step: 900/938, Loss: 2.3010
Epoch: 3/5, Step: 100/938, Loss: 2.3029
Epoch: 3/5, Step: 200/938, Loss: 2.3008
Epoch: 3/5, Step: 300/938, Loss: 2.3009
Epoch: 3/5, Step: 400/938, Loss: 2.3018
Epoch: 3/5, Step: 500/938, Loss: 2.3016
Epoch: 3/5, Step: 600/938, Loss: 2.3023
Epoch: 3/5, Step: 700/938, Loss: 2.3013


In [None]:
def calculate_accuracy(model, dataloader):
    total_correct = 0
    total_samples = 0
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for images, labels in testloader:
            images = images.to(device)
            labels = labels.to(device)
            batch_size = images.size(0)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
    accuracy = 100 * total_correct / total_samples
    return accuracy

In [None]:
# Calculate accuracy for the fine-tuned models
cifar10_accuracy = calculate_accuracy(cifar10_pretrained_model, testloader)
mnist_accuracy = calculate_accuracy(mnist_pretrained_model, testloader)

# Calculate accuracy for the model trained from scratch
scratch_model_accuracy = calculate_accuracy(from_scratch_model, testloader)

print(f'Accuracy of the CIFAR pretrained model: {cifar10_accuracy}%')
print(f'Accuracy of the MNIST pretrained model: {mnist_accuracy}%')
print(f'Accuracy of the model trained from scratch: {scratch_model_accuracy}%')

Accuracy of the CIFAR-10 pretrained model: 73.53%
Accuracy of the MNIST pretrained model: 57.87%
Accuracy of the model trained from scratch: 14.58%


## **Nhận xét**
- Model pretrained trên CIFAR đã có kết quả tốt nhất, tiếp theo là model pretrained trên MNIST. Mô hình được train lại từ đầu có độ chính xác thấp nhất.

- Giải thích:
    - Pretrained tốt hơn train from scratch: Các mô hình pretrained trên các bộ dữ liệu lớn thường đã học được các đặc trưng hữu ích, việc sử dụng lại các đặc trưng này giúp giảm thiểu việc cần đào tạo một mô hình từ đầu và tiết kiệm thời gian đào tạo.
    - Pretrained trên CIFAR tốt hơn MNIST: Dataset CIFAR có nhiều feature gần giống với dataset FashionMNIST hơn so với dataset MNIST, dẫn đến việc chuyển đổi kiến thức tốt hơn.

- Nếu train với nhiều epochs hơn thì accuracy của 3 models có giống nhau không?
  - Không, nếu huấn luyện với nhiều epochs hơn, hiệu suất của 3 mô hình không nhất thiết phải giống nhau.
  - Hiệu suất của mỗi mô hình có thể khác nhau tùy thuộc vào kiến trúc mô hình, bộ dữ liệu, và cách đào tạo.

# **Feature extractor**

Với model train from scratch, dùng get_graph_node_names() và create_feature_extractor từ thư viện torchvision.models.feature_extraction để in ra tên layer và weight của layer tương ứng


In [None]:
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names

In [None]:
train_nodes, eval_nodes = get_graph_node_names(from_scratch_model)

In [None]:
train_nodes

['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'flatten',
 'classifier']

In [None]:
create_feature_extractor(from_scratch_model, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
from_scratch_model.features[0].weight

Parameter containing:
tensor([[[[-0.0885,  0.2641,  0.2803],
          [ 0.2875, -0.2080, -0.2344],
          [ 0.0181,  0.0344, -0.2689]]],


        [[[ 0.0111, -0.0864,  0.2969],
          [-0.3170,  0.2687, -0.2292],
          [-0.1670,  0.2423, -0.2965]]],


        [[[-0.1298, -0.1011,  0.3079],
          [-0.2006,  0.1165,  0.2662],
          [ 0.0377,  0.1113,  0.0813]]],


        [[[-0.0072,  0.1617,  0.2808],
          [ 0.1617,  0.2832,  0.1461],
          [ 0.0417, -0.2987, -0.0046]]],


        [[[ 0.2739, -0.2942,  0.0323],
          [-0.1317,  0.1386,  0.3188],
          [-0.0466,  0.0808,  0.2073]]],


        [[[ 0.2501, -0.2185, -0.3150],
          [-0.1460,  0.1609,  0.0239],
          [ 0.1789, -0.1644,  0.1819]]],


        [[[ 0.0758,  0.1495, -0.1737],
          [ 0.1527, -0.1274, -0.3110],
          [-0.0079,  0.3079, -0.2461]]],


        [[[ 0.2400,  0.2640,  0.2991],
          [ 0.0952, -0.1350, -0.2974],
          [ 0.1662,  0.1215,  0.1854]]],


        [[