In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10, MNIST, FashionMNIST
from torch.utils.data import DataLoader

In [None]:
learning_rate = 0.001
batch_size = 64
num_epochs = 5
momentum = 0.9

## Data Augmentation

In [None]:
# Define the transformations
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    #transforms.CenterCrop(28),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)), ])  # Normalize to [-1, 1]

# Load data and Apply the transformations to the dataset
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Finetuning

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class MiniVGG(nn.Module):
    def __init__(self,):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size= 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256*3*3, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
def train_model(model, optimizer, train_loader):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1} - Loss: {running_loss / len(train_loader)}")


# Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print("Accuracy on the test set:", accuracy)

### CIFAR-10

In [None]:
from google.colab import files

uploaded = files.upload()

Saving cifar10_mini_vgg.pth to cifar10_mini_vgg.pth


In [None]:
path = "cifar10_mini_vgg.pth"
model_cifar = MiniVGG()

#Load model's weight
#model_cifar = torch.load(path)#, map_location=torch.device(device))
model_cifar.load_state_dict(torch.load(path,map_location=torch.device(device)), strict= False)

<All keys matched successfully>

In [None]:
# Freeze all layers except the last layer
for param in model_cifar.parameters():
    param.requires_grad = False

# Set the requires_grad attribute of the parameters in the last layer to True
for param in model_cifar.classifier.parameters():
    param.requires_grad = True

In [None]:
#Define loss function and optimizer
model_cifar.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_cifar = optim.SGD(model_cifar.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_cifar, optimizer_cifar, train_loader)

accuracy_cifar = evaluate_model(model_cifar, test_loader)

Epoch 1 - Loss: 0.9976865335631726
Epoch 2 - Loss: 0.4918440717941662
Epoch 3 - Loss: 0.42290419626083453
Epoch 4 - Loss: 0.38590323954407596
Epoch 5 - Loss: 0.37005098025078204
Accuracy on the test set: 87.18


### MNIST

In [None]:
from google.colab import files

uploaded = files.upload()

Saving mnist_mini_vgg.pth to mnist_mini_vgg.pth


In [None]:
path = "mnist_mini_vgg.pth"
model_mnist = MiniVGG()

model_mnist.load_state_dict(torch.load(path,map_location=torch.device(device)), strict= False)

<All keys matched successfully>

In [None]:
# Freeze all layers except the last layer
for param in model_mnist.parameters():
    param.requires_grad = False

# Set the requires_grad attribute of the parameters in the last layer to True
for param in model_mnist.classifier.parameters():
    param.requires_grad = True

In [None]:
#Define loss function and optimizer
model_mnist.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_mnist = optim.SGD(model_mnist.parameters(), lr=learning_rate,momentum=momentum)

train_model(model_mnist, optimizer_mnist, train_loader)

accuracy_mnist = evaluate_model(model_mnist, test_loader)

Epoch 1 - Loss: 0.9410194106765394
Epoch 2 - Loss: 0.5633330147371871
Epoch 3 - Loss: 0.514084163457473
Epoch 4 - Loss: 0.4874532163492652
Epoch 5 - Loss: 0.4722584866003187
Accuracy on the test set: 83.1


### Train MiniVGG on FashionMnist from scratch

In [None]:
model_fashionmnist = MiniVGG()
model_fashionmnist.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_fashionmnist = optim.SGD(model_fashionmnist.parameters(), lr=learning_rate, momentum=momentum)

train_model(model_fashionmnist, optimizer_fashionmnist, train_loader)

accuracy_fashionmnist = evaluate_model(model_fashionmnist, test_loader)

Epoch 1 - Loss: 2.292074187478023
Epoch 2 - Loss: 1.0408521668870312
Epoch 3 - Loss: 0.6004810636358728
Epoch 4 - Loss: 0.5053610619165496
Epoch 5 - Loss: 0.45080493185629467
Accuracy on the test set: 83.46


Model pretrain CIFAR-10 cho kết quả cao nhất vì tính chất của các ảnh thuộc CIFAR-10 và FASHIONMNIST (đồ vật, con vật, thiên về các đường nét đa dạng, phức tạp) có độ tương đồng cao hơn so với MNIST chỉ là các con số.

# Feature Extraction

In [None]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_fashionmnist)

In [None]:
train_nodes

['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'flatten',
 'classifier']

In [None]:
create_feature_extractor(model_fashionmnist, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
model_fashionmnist.features[0].weight

Parameter containing:
tensor([[[[ 1.7197e-01,  8.4022e-02, -2.2031e-01],
          [-2.0753e-01, -6.4311e-02,  2.8501e-01],
          [ 2.3773e-01,  2.6604e-01, -2.9772e-01]]],


        [[[-2.9456e-01,  2.5813e-01, -3.0895e-01],
          [ 2.5113e-01, -2.5110e-01, -1.9612e-01],
          [ 1.0672e-01, -2.8410e-01,  9.8710e-02]]],


        [[[-1.9519e-01,  2.8709e-01,  2.1346e-01],
          [-2.3781e-01, -1.3502e-01, -2.9352e-01],
          [ 1.5684e-01,  2.1149e-01,  2.2501e-01]]],


        [[[-2.8035e-01,  2.2753e-01, -1.2211e-02],
          [-8.1250e-02,  2.0683e-01, -6.6967e-02],
          [-2.5064e-01,  2.5617e-01,  2.4382e-01]]],


        [[[ 1.0010e-01, -1.4914e-01,  3.1136e-01],
          [ 3.0568e-01,  2.6047e-01, -2.7086e-01],
          [ 2.2416e-01,  1.8952e-01, -2.4795e-01]]],


        [[[ 1.8780e-01, -1.7215e-01, -1.3949e-01],
          [ 2.5965e-01, -7.9825e-02, -2.5835e-01],
          [ 1.8798e-01, -2.1342e-01, -2.2483e-01]]],


        [[[ 3.1954e-01,  6.1459e-02,