<b>Fine Tuning</b>

In [1]:
import numpy as np
import torch

seed = 100
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [2]:
# Dataset : MNIST

from torchvision import datasets, transforms
import torch.utils as utils

transform = transforms.Compose([
    transforms.ToTensor()])

dataset_train_mnist = datasets.MNIST(
    '~/mnist', 
    train=True, 
    download=True, 
    transform=transform)
dataset_test_mnist  = datasets.MNIST(
    '~/mnist', 
    train=False, 
    download=True, 
    transform=transform)

batch_size = 1000

dataloader_train_mnist = utils.data.DataLoader(dataset_train_mnist,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)
dataloader_test_mnist  = utils.data.DataLoader(dataset_test_mnist,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

In [3]:
# Network

import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv = nn.Sequential(OrderedDict([
        ("conv1", nn.Conv2d(1, 4, kernel_size=5)),
        ("relu1", nn.ReLU()),
        ("pool1", nn.MaxPool2d(2)),
        ("conv2", nn.Conv2d(4, 8, kernel_size=5)),
        ("relu2", nn.ReLU()),
        ("pool2", nn.MaxPool2d(2)),
    ]))
    self.fc = nn.Sequential(OrderedDict([
        ("fc1"  , nn.Linear(8 * 4 * 4, 100)),
        ("relu1", nn.ReLU()),
        ("fc2"  , nn.Linear(100, 10)),
    ]))

  def forward(self, x1):
    x2 = self.conv(x1)
    x3 = x2.view(x2.size()[0], -1)
    y  = self.fc(x3)
    return y

In [4]:
# Training

from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for i in range(20):
  print(f"EPOCH: {i+1}")

  ### Train ###
  model.train()
  for x, t in dataloader_train_mnist:
    x = x.to(device)
    t = t.to(device)
    model.zero_grad()
    y = model(x)
    loss = criterion(y, t)
    loss.backward()
    optimizer.step()

  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_train_mnist:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  train loss: {sum_loss/sum_iter}")
  print(f"  train acc : {sum_correct/(sum_iter*batch_size)}")

  ### Test ###
  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_test_mnist:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  test  loss: {sum_loss/sum_iter}")
  print(f"  test  acc : {sum_correct/(sum_iter*batch_size)}")

EPOCH: 1
  train loss: 0.5909978826840718
  train acc : 0.8283
  test  loss: 0.5575805306434631
  test  acc : 0.8412
EPOCH: 2
  train loss: 0.32243935714165367
  train acc : 0.90445
  test  loss: 0.30150979608297346
  test  acc : 0.9095
EPOCH: 3
  train loss: 0.25395206088821093
  train acc : 0.92425
  test  loss: 0.2361677572131157
  test  acc : 0.9289
EPOCH: 4
  train loss: 0.20649680644273757
  train acc : 0.9388333333333333
  test  loss: 0.18966234028339385
  test  acc : 0.9461
EPOCH: 5
  train loss: 0.17659781277179717
  train acc : 0.9457
  test  loss: 0.16153196468949318
  test  acc : 0.9522
EPOCH: 6
  train loss: 0.15401257785658043
  train acc : 0.9537333333333333
  test  loss: 0.1409040503203869
  test  acc : 0.9587
EPOCH: 7
  train loss: 0.1318309967716535
  train acc : 0.9606
  test  loss: 0.12094156518578529
  test  acc : 0.9649
EPOCH: 8
  train loss: 0.11875455478827159
  train acc : 0.9642166666666667
  test  loss: 0.10511731430888176
  test  acc : 0.9673
EPOCH: 9
  trai

In [5]:
# Check Parameter before Fine-Tuning

print(model.conv.conv1.weight[0])
print(model.fc.fc2.weight[0])

tensor([[[-0.0985,  0.0879, -0.1902, -0.1860, -0.1028],
         [ 0.1718, -0.0648, -0.1451, -0.0651, -0.0996],
         [-0.0330,  0.2190,  0.2385,  0.2755,  0.0984],
         [ 0.3187,  0.3777,  0.2911,  0.0618,  0.0600],
         [-0.0226,  0.2461,  0.1406,  0.3183,  0.1211]]], device='cuda:0',
       grad_fn=<SelectBackward>)
tensor([ 4.4739e-03,  1.2526e-01, -1.9917e-02, -4.6347e-02,  2.2909e-02,
        -6.2701e-02, -7.0424e-02, -5.7456e-02,  1.2736e-01, -1.4160e-01,
        -6.9129e-02,  1.4174e-01, -1.9258e-01,  1.0845e-01,  5.2486e-03,
        -7.8624e-02,  1.3389e-02, -1.5213e-01,  1.4314e-01, -6.8443e-02,
         9.4356e-02,  8.4462e-02, -6.6918e-02,  4.8688e-02,  1.2247e-02,
         1.0085e-01, -6.6323e-02, -1.5319e-03, -7.8377e-02, -6.4501e-03,
         7.7620e-02,  3.2653e-02,  3.1954e-02, -9.0260e-02,  1.0515e-01,
        -1.7098e-01,  5.8217e-02, -8.9542e-03,  4.9337e-02, -1.6820e-01,
         3.1405e-02, -3.3455e-02, -7.8176e-02,  7.6675e-02, -1.3678e-01,
         1.

In [6]:
# Freeze Conv Parameter

for param in model.conv.parameters():
  param.requires_grad = False

for name, param in model.named_parameters():
  print(name, param.requires_grad)

conv.conv1.weight False
conv.conv1.bias False
conv.conv2.weight False
conv.conv2.bias False
fc.fc1.weight True
fc.fc1.bias True
fc.fc2.weight True
fc.fc2.bias True


In [7]:
# Dataset : Fashion-MNIST

from torchvision import datasets, transforms
import torch.utils as utils

transform = transforms.Compose([
    transforms.ToTensor()])

dataset_train_fmnist = datasets.FashionMNIST(
    '~/fashion-mnist', 
    train=True, 
    download=True, 
    transform=transform)
dataset_test_fmnist  = datasets.FashionMNIST(
    '~/fashion-mnist', 
    train=False, 
    download=True, 
    transform=transform)

batch_size = 1000

dataloader_train_fmnist = utils.data.DataLoader(dataset_train_fmnist,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)
dataloader_test_fmnist  = utils.data.DataLoader(dataset_test_fmnist,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

In [8]:
# Fine-Tuning

for i in range(20):
  print(f"EPOCH: {i+1}")

  ### Train ###
  model.train()
  for x, t in dataloader_train_fmnist:
    x = x.to(device)
    t = t.to(device)
    model.zero_grad()
    y = model(x)
    loss = criterion(y, t)
    loss.backward()
    optimizer.step()

  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_train_fmnist:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  train loss: {sum_loss/sum_iter}")
  print(f"  train acc : {sum_correct/(sum_iter*batch_size)}")

  ### Test ###
  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_test_fmnist:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  test  loss: {sum_loss/sum_iter}")
  print(f"  test  acc : {sum_correct/(sum_iter*batch_size)}")

EPOCH: 1
  train loss: 0.700119603673617
  train acc : 0.7395666666666667
  test  loss: 0.722387158870697
  test  acc : 0.7275
EPOCH: 2
  train loss: 0.6315408299366633
  train acc : 0.76425
  test  loss: 0.6574683725833893
  test  acc : 0.7507
EPOCH: 3
  train loss: 0.5941612869501114
  train acc : 0.7801833333333333
  test  loss: 0.6200386345386505
  test  acc : 0.7694
EPOCH: 4
  train loss: 0.5723256339629491
  train acc : 0.7873833333333333
  test  loss: 0.5976135730743408
  test  acc : 0.7775
EPOCH: 5
  train loss: 0.5561575805147488
  train acc : 0.79415
  test  loss: 0.583068311214447
  test  acc : 0.7863
EPOCH: 6
  train loss: 0.5437386398514111
  train acc : 0.8001666666666667
  test  loss: 0.5709605693817139
  test  acc : 0.7893
EPOCH: 7
  train loss: 0.5304510210951169
  train acc : 0.8072833333333334
  test  loss: 0.5576499283313752
  test  acc : 0.7997
EPOCH: 8
  train loss: 0.5291270206371943
  train acc : 0.8028166666666666
  test  loss: 0.5565049827098847
  test  acc : 

In [11]:
# Check Parameter after Fine-Tuning

print(model.conv.conv1.weight[0])
print(model.fc.fc2.weight[0])

tensor([[[-0.0967,  0.0885, -0.1903, -0.1866, -0.1035],
         [ 0.1728, -0.0644, -0.1450, -0.0653, -0.0999],
         [-0.0323,  0.2196,  0.2390,  0.2761,  0.0989],
         [ 0.3187,  0.3782,  0.2917,  0.0624,  0.0605],
         [-0.0231,  0.2460,  0.1407,  0.3189,  0.1217]]], device='cuda:0')
tensor([ 0.0575,  0.0650, -0.1363, -0.0098,  0.0251, -0.0627, -0.0704, -0.1061,
         0.0677, -0.0783, -0.3935,  0.2101, -0.3144,  0.2200, -0.0327, -0.0546,
         0.0540, -0.2238,  0.0951, -0.0959,  0.0960,  0.0623, -0.0968,  0.0465,
         0.0337,  0.1604, -0.0193,  0.0191, -0.0784,  0.0136,  0.3060, -0.0195,
         0.0739, -0.1016,  0.1331, -0.2097,  0.0854, -0.0090,  0.0493, -0.0699,
         0.0063,  0.1402, -0.0448,  0.0737, -0.1148,  0.1185,  0.0521, -0.1865,
        -0.0516,  0.0155, -0.0172, -0.0341, -0.0449, -0.0956, -0.0053,  0.0402,
         0.0112,  0.1212, -0.0658, -0.1897, -0.1160, -0.1381,  0.0961, -0.0798,
         0.1022, -0.0864,  0.0012,  0.0804, -0.1570,  0.1236,

参考<br>
https://pytorch.org/docs/stable/notes/autograd.html