In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10, MNIST, FashionMNIST
from torch.utils.data import DataLoader

In [None]:
learning_rate = 0.001
batch_size = 64
num_epochs = 5
momentum = 0.9

## 1. Data Augmentation

In [None]:
# Define the transformations
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    #transforms.CenterCrop(28),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)), ])  # Normalize to [-1, 1]

# Load data and Apply the transformations to the dataset
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## 2. Finetuning

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class MiniVGG(nn.Module):
    def __init__(self,):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256*3*3, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
def train_model(model, optimizer, train_loader):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1} - Loss: {running_loss / len(train_loader)}")


# Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print("Accuracy on the test set:", accuracy)

### CIFAR-10

In [None]:
from google.colab import files

uploaded = files.upload()

Saving cifar10_mini_vgg.pth to cifar10_mini_vgg.pth


In [None]:
path = "cifar10_mini_vgg.pth"
model_cifar = MiniVGG()

#Load model's weight
#model_cifar = torch.load(path)#, map_location=torch.device(device))
model_cifar.load_state_dict(torch.load(path,map_location=torch.device(device)), strict= False)

<All keys matched successfully>

In [None]:
# Freeze all layers except the last layer
for param in model_cifar.parameters():
    param.requires_grad = False

# Set the requires_grad attribute of the parameters in the last layer to True
for param in model_cifar.classifier.parameters():
    param.requires_grad = True

In [None]:
#Define loss function and optimizer
model_cifar.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_cifar = optim.SGD(model_cifar.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_cifar, optimizer_cifar, train_loader)

accuracy_cifar = evaluate_model(model_cifar, test_loader)

Epoch 1 - Loss: 0.9834894227193617
Epoch 2 - Loss: 0.49720111854676247
Epoch 3 - Loss: 0.4188035392859724
Epoch 4 - Loss: 0.38705016694851774
Epoch 5 - Loss: 0.36172874768310265
Accuracy on the test set: 86.65


### MNIST

In [None]:
from google.colab import files

uploaded = files.upload()

Saving mnist_mini_vgg.pth to mnist_mini_vgg.pth


In [None]:
path = "mnist_mini_vgg.pth"
model_mnist = MiniVGG()

model_mnist.load_state_dict(torch.load(path,map_location=torch.device(device)), strict= False)

<All keys matched successfully>

In [None]:
# Freeze all layers except the last layer
for param in model_mnist.parameters():
    param.requires_grad = False

# Set the requires_grad attribute of the parameters in the last layer to True
for param in model_mnist.classifier.parameters():
    param.requires_grad = True

In [None]:
#Define loss function and optimizer
model_mnist.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_mnist = optim.SGD(model_mnist.parameters(), lr=learning_rate,momentum=momentum)

train_model(model_mnist, optimizer_mnist, train_loader)

accuracy_mnist = evaluate_model(model_mnist, test_loader)

Epoch 1 - Loss: 0.9384697860619152
Epoch 2 - Loss: 0.5664113389212948
Epoch 3 - Loss: 0.515088930360671
Epoch 4 - Loss: 0.4898987281233517
Epoch 5 - Loss: 0.46939070289259527
Accuracy on the test set: 82.8


### Train MiniVGG on FashionMnist from scratch

In [None]:
model_fashionmnist = MiniVGG()
model_fashionmnist.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_fashionmnist = optim.SGD(model_fashionmnist.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_fashionmnist, optimizer_fashionmnist, train_loader)

accuracy_fashionmnist = evaluate_model(model_fashionmnist, test_loader)

Epoch 1 - Loss: 2.2940507166421233
Epoch 2 - Loss: 1.0220509837431186
Epoch 3 - Loss: 0.5833006607316958
Epoch 4 - Loss: 0.4847709890494723
Epoch 5 - Loss: 0.4377113067106143
Accuracy on the test set: 84.28


#### More Epochs

In [None]:
learning_rate = 0.001
batch_size = 64
num_epochs = 10
momentum = 0.9

In [None]:
model_cifar.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_cifar = optim.SGD(model_cifar.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_cifar, optimizer_cifar, train_loader)

accuracy_cifar = evaluate_model(model_cifar, test_loader)

Epoch 1 - Loss: 0.299446892438095
Epoch 2 - Loss: 0.2980200051189041
Epoch 3 - Loss: 0.29640491365560334
Epoch 4 - Loss: 0.29407043786032366
Epoch 5 - Loss: 0.2943989848896766
Epoch 6 - Loss: 0.29686618575663454
Epoch 7 - Loss: 0.2933509435528504
Epoch 8 - Loss: 0.2940853932884329
Epoch 9 - Loss: 0.29178385206980745
Epoch 10 - Loss: 0.28960249278305183
Accuracy on the test set: 88.48


In [None]:
model_mnist.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_mnist = optim.SGD(model_mnist.parameters(), lr=learning_rate,momentum=momentum)

train_model(model_mnist, optimizer_mnist, train_loader)

accuracy_mnist = evaluate_model(model_mnist, test_loader)

Epoch 1 - Loss: 0.9400641322771369
Epoch 2 - Loss: 0.5654646414778889
Epoch 3 - Loss: 0.513222890637958
Epoch 4 - Loss: 0.49015440626629886
Epoch 5 - Loss: 0.47014945198986324
Epoch 6 - Loss: 0.45908363031616595
Epoch 7 - Loss: 0.4499325364160894
Epoch 8 - Loss: 0.443547547673747
Epoch 9 - Loss: 0.43644356455947797
Epoch 10 - Loss: 0.42945727478784285
Accuracy on the test set: 83.89


In [None]:
model_fashionmnist = MiniVGG()
model_fashionmnist.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_fashionmnist = optim.SGD(model_fashionmnist.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_fashionmnist, optimizer_fashionmnist, train_loader)

accuracy_fashionmnist = evaluate_model(model_fashionmnist, test_loader)

Epoch 1 - Loss: 2.199146497732541
Epoch 2 - Loss: 0.9029427191723131
Epoch 3 - Loss: 0.6168647531126099
Epoch 4 - Loss: 0.5179052035659869
Epoch 5 - Loss: 0.4676335608241146
Epoch 6 - Loss: 0.4350304819309889
Epoch 7 - Loss: 0.40968141995513363
Epoch 8 - Loss: 0.3904583975831583
Epoch 9 - Loss: 0.37391796700163943
Epoch 10 - Loss: 0.360564165579866
Accuracy on the test set: 86.33


#### Nhận xét
- Khả năng tổng quan hóa của một mô hình pretrained trên tập dữ liệu gốc phụ thuộc vào mức độ tương tự giữa tập dữ liệu gốc và tập dữ liệu mới. Trong trường hợp này, CIFAR-10 và FashionMNIST đều là tập dữ liệu hình ảnh về các sản phẩm thời trang, vì vậy dữ liệu có mức độ tương tự về nội dung khá cao. Do đó, mô hình pretrained từ CIFAR-10 có khả năng tổng quan hóa tốt hơn trên tập dữ liệu FashionMNIST hơn mô hình pretrained từ MNIST. Accuracy của pretrained model CIFAR-10 là 86.65, tiếp đó là model FashionMNIST from scratch 84.28 và cuối cùng là MNIST pretrained model 82.8

- Sau khi tăng số lượng epochs thì accuracy của 3 model không giống nhau. Với nhiều epochs hơn thì accuracy của cả 3 model đều tăng.

##3. Feature Extraction

In [None]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_fashionmnist)

In [None]:
train_nodes

['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'flatten',
 'classifier']

In [None]:
create_feature_extractor(model_fashionmnist, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
model_fashionmnist.features[0].weight

Parameter containing:
tensor([[[[-0.3964,  0.2778,  0.3302],
          [-0.2101,  0.2822, -0.2866],
          [-0.4060,  0.1549,  0.2292]]],


        [[[-0.2469, -0.1236, -0.1002],
          [-0.0329, -0.1331,  0.1899],
          [ 0.2027,  0.1597, -0.0892]]],


        [[[ 0.1681,  0.1100,  0.1068],
          [ 0.2355, -0.2880, -0.1570],
          [-0.0099,  0.2958,  0.0628]]],


        [[[-0.0589,  0.2624,  0.2137],
          [-0.0548,  0.3148, -0.2195],
          [ 0.0631, -0.0386,  0.0806]]],


        [[[ 0.0320,  0.1809,  0.2814],
          [-0.1598, -0.0119,  0.0256],
          [-0.0069, -0.1713, -0.2688]]],


        [[[ 0.2926, -0.0017, -0.2618],
          [ 0.1928, -0.0295,  0.3107],
          [-0.1416,  0.2628,  0.0931]]],


        [[[-0.0254,  0.0505, -0.1189],
          [ 0.0190,  0.1902,  0.1115],
          [-0.2718, -0.0822,  0.2864]]],


        [[[-0.0006,  0.2006, -0.0590],
          [-0.1642,  0.2700, -0.1411],
          [ 0.1956,  0.3263, -0.3109]]],


        [[