# Transfer Learning

## - ex) CIFAR10 pre-trained model to FONT-50 (final project dataset)

#### (0) Find suitable pre-trained model to our cusom dataset

<img src="../../shared/TL_final.png" alt="Drawing" style="width: 1000px;" align="left"/>

#### (1) Load Pre-trained Model

In [17]:
from cnn import ConvNet
import torch
import torch.nn as nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [18]:
model = ConvNet().to(device)
model.load_state_dict(torch.load('./pths/cifar10_pre_model.pth', map_location=device))

<All keys matched successfully>

In [9]:
print("===== Loaded Pre-trained Model =====", "\n", model)

===== Loaded Pre-trained Model ===== 
 ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2048, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)


#### (2) Edit Model (Freeze + Edit)

* Freeze Loaded Model's Parameters

In [11]:
for params in model.parameters():
    params.requires_grad = False

* Edit Loaded Model

In [20]:
model.fc2 = nn.Linear(120, 50)

model = model.to(device) # optional for running on CPU or CUDA

In [26]:
print("===== Loaded Pre-trained Model =====", "\n", model)

===== Loaded Pre-trained Model ===== 
 ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2048, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=50, bias=True)
)


In [21]:
# cf) See what model.parameters are
for params in model.parameters():
    print(params.size())

torch.Size([16, 3, 5, 5])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([32, 16, 5, 5])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([120, 2048])
torch.Size([120])
torch.Size([50, 120])
torch.Size([50])


#### (3) Train

In [25]:
from cnn import ConvNet
from font_dataset import FontDataset
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import os


lr = 0.001
num_epochs = 1
batch_size = 100

### Config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Load Data
train_dir = '~/datasets/font/npy_train'.replace('~', os.path.expanduser('~'))
train_data = FontDataset(train_dir)

test_dir = '~/datasets/font/npy_test'.replace('~', os.path.expanduser('~'))
test_data = FontDataset(test_dir)

### Define Dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=batch_size)

test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                           batch_size=batch_size)

### Define Model and Load Params
model = ConvNet().to(device)
print("========================== Original Model =============================", "\n", model)
model.load_state_dict(torch.load('./pths/cifar10_pre_model.pth', map_location=device))

### User pre-trained model and Only change last layer
for param in model.parameters():
    param.requires_grad = False

model.fc2 = nn.Linear(120, 50)
modle = model.to(device)

print("========================== Modified Model =============================", "\n", model)

### Define Loss and Optim
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

### Train
if __name__ == '__main__':
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images).to(device)

            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print Loss for Tracking Training
            if (i+1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
                test_image, test_label = next(iter(test_loader))
                _, test_predicted = torch.max(model(test_image.to(device)).data, 1)

    # Test after Training is done
    model.eval() # Set model to Evaluation Mode (Batchnorm uses moving mean/var instead of mini-batch mean/var)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the {} test images: {} %'.format(len(test_loader)*batch_size, 100 * correct / total))

 ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2048, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)
 ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, c