In [1]:
# load packages

%matplotlib inline

import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import random

import torch
import torchvision
from torchvision import transforms, utils
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

# Q1.

In [2]:
# train

labels = ['blazer', 'cardigan', 'coat', 'jacket']
train_num = {}
total = 0

for i in range(4):
    basepath = os.path.join("photos/train", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    n = 0
    for afn in cand_fn:    
        n += 1
        total += 1
    train_num[labels[i]] = n
train_num['total'] = total

In [3]:
# test

test_num = {}
total = 0

for i in range(4):
    basepath = os.path.join("photos/test", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    n = 0
    for afn in cand_fn:    
        n += 1
        total += 1
    test_num[labels[i]] = n
test_num['total'] = total

In [4]:
# valid

valid_num = {}
total = 0

for i in range(4):
    basepath = os.path.join("photos/valid", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    n = 0
    for afn in cand_fn:    
        n += 1
        total += 1
    valid_num[labels[i]] = n
valid_num['total'] = total

In [5]:
# 列出train, valid, test的總照片數，以及各類別的照片數與比率

print('Train:')
print('total photo number:', train_num['total'])
for label in labels:
    print(label, ':') 
    print('number =',train_num[label])
    print('ratio = ',train_num[label]/train_num['total'])
    
print('\n')

print('Test:')
print('total photo number:', test_num['total'])
for label in labels:
    print(label, ':') 
    print('number =',test_num[label])
    print('ratio = ',test_num[label]/test_num['total'])
    
print('\n')

print('Valid:')
print('total photo number:', valid_num['total'])
for label in labels:
    print(label, ':') 
    print('number =',valid_num[label])
    print('ratio = ',valid_num[label]/valid_num['total'])

Train:
total photo number: 1041
blazer :
number = 97
ratio =  0.09317963496637849
cardigan :
number = 237
ratio =  0.2276657060518732
coat :
number = 296
ratio =  0.28434197886647455
jacket :
number = 411
ratio =  0.39481268011527376


Test:
total photo number: 146
blazer :
number = 9
ratio =  0.06164383561643835
cardigan :
number = 42
ratio =  0.2876712328767123
coat :
number = 43
ratio =  0.2945205479452055
jacket :
number = 52
ratio =  0.3561643835616438


Valid:
total photo number: 105
blazer :
number = 7
ratio =  0.06666666666666667
cardigan :
number = 36
ratio =  0.34285714285714286
coat :
number = 27
ratio =  0.2571428571428571
jacket :
number = 35
ratio =  0.3333333333333333


根據train和valid的資料數量比較，推測準確率的大小應該為：jacket > coat > cardigan > blazer

# Load data (using colab)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
train = torchvision.datasets.ImageFolder('/content/drive/MyDrive/photos/train',transform = transforms.Compose([transforms.Resize(256),
                                                                                                               transforms.CenterCrop(224),
                                                                                                               transforms.RandomHorizontalFlip(),
                                                                                                               transforms.RandomRotation(20),
                                                                                                               transforms.ToTensor(),
                                                                                                               transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]))

In [4]:
test = torchvision.datasets.ImageFolder('/content/drive/MyDrive/photos/test', transform = transforms.Compose([transforms.Resize(256),
                                                                                                              transforms.CenterCrop(224),
                                                                                                              transforms.ToTensor(),
                                                                                                              transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]))

In [5]:
valid = torchvision.datasets.ImageFolder('/content/drive/MyDrive/photos/valid', transform = transforms.Compose([transforms.Resize(256),
                                                                                                                transforms.CenterCrop(224),
                                                                                                                transforms.ToTensor(),
                                                                                                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]))

In [6]:
trainloader = torch.utils.data.DataLoader(train, batch_size = 32, shuffle=True, num_workers=0)
validloader = torch.utils.data.DataLoader(valid, batch_size = 32, shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(test, batch_size = 1, shuffle=True, num_workers=0)

# Early Stopping

In [7]:
def valid(model, loss_fn = torch.nn.CrossEntropyLoss()):
    
    model.eval()
    
    sum_loss = 0
    
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(validloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            sum_loss += loss
    
    return sum_loss

In [8]:
# 除了準確率還要補上Confusion Matrix, 與Per-class Accuracy
def test(model, loss_fn = torch.nn.CrossEntropyLoss()):

    model.eval()
    
    correct = 0
    total = 0
    y_true = np.zeros(146)
    y_pred = np.zeros(146)

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            
            # 取得分最高的那一類
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum()
            y_true[batch_idx] = targets.item()
            y_pred[batch_idx] = predicted.item()

    cm = confusion_matrix(y_true, y_pred)
    print('Accuracy of the network on the 146 test images: %d %%' % (100 * correct / total))
    print('Confusion Matrix :')
    print(cm)
    print('Per-class Accuracy :', cm.diagonal()/cm.sum(axis=1))

In [9]:
def train(model, optimizer, loss_fn = torch.nn.CrossEntropyLoss()):
    
    nepoch = 200
    patient = 20
    
    best_valid_loss = float("inf")
    best_state = dict()
    step_count = 0
    best_step_count = 0
    stop_training = False
    
    for epoch_id in range(0, nepoch):
      
        if stop_training:
            break
            
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            model.train()
            
            step_count += 1
            
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            
        # calculate valid loss
        valid_loss = valid(model)
            
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_step_count = step_count
            best_state = {'model': model.state_dict(),
                          'loss': valid_loss,
                          'step_count': step_count,}
            
        if step_count > (best_step_count + patient):
            stop_training = True
    
    #model.load_state_dict(best_state['model'])
    print('lowest valid loss =', best_state['loss'])

In [10]:
def train_test(model, optimizer, loss_fn = torch.nn.CrossEntropyLoss()):
    
    nepoch = 200
    patient = 20
    
    best_valid_loss = float("inf")
    best_state = dict()
    step_count = 0
    best_step_count = 0
    stop_training = False
    
    for epoch_id in range(0, nepoch):
      
        #print(epoch_id)
        if stop_training:
            break
            
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            model.train()
            
            step_count += 1
            
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            
        # calculate valid loss
        valid_loss = valid(model)
        
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_step_count = step_count
            best_state = {'model': model.state_dict(),
                          'loss': valid_loss,
                          'step_count': step_count,}
        
        if step_count > (best_step_count + patient):
                stop_training = True
                break
    
    model.load_state_dict(best_state['model'])
    test(model)

# Q2. pretrained=True

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
# tuning hyperparameter for SGD

learning_rate = [0.00001, 0.00005, 0.00009, 0.0001, 0.0005, 0.0009, 0.001, 0.005, 0.009]
for lr in learning_rate:
    model_1 = torchvision.models.resnet50(pretrained=True)
    fc_features = model_1.fc.in_features
    model_1.fc = torch.nn.Linear(fc_features, 4)
    model_1 = model_1.to(device)
    optimizer = torch.optim.SGD(model_1.parameters(), lr=lr, momentum=0, weight_decay = 0)
    print('learning rate =', lr)
    train(model_1, optimizer)

learning rate = 1e-05
lowest valid loss = tensor(5.5808, device='cuda:0')
learning rate = 5e-05
lowest valid loss = tensor(5.3007, device='cuda:0')
learning rate = 9e-05
lowest valid loss = tensor(5.4500, device='cuda:0')
learning rate = 0.0001
lowest valid loss = tensor(5.2862, device='cuda:0')
learning rate = 0.0005
lowest valid loss = tensor(5.1034, device='cuda:0')
learning rate = 0.0009
lowest valid loss = tensor(4.8244, device='cuda:0')
learning rate = 0.001
lowest valid loss = tensor(4.9529, device='cuda:0')
learning rate = 0.005
lowest valid loss = tensor(3.4703, device='cuda:0')
learning rate = 0.009
lowest valid loss = tensor(2.7666, device='cuda:0')


In [None]:
# tuning hyperparameter for Adam

learning_rate = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009]
for lr in learning_rate:
    model_1 = torchvision.models.resnet50(pretrained=True)
    fc_features = model_1.fc.in_features
    model_1.fc = torch.nn.Linear(fc_features, 4)
    model_1 = model_1.to(device)
    optimizer = torch.optim.Adam(model_1.parameters(), lr=lr, weight_decay = 0)
    print('learning rate =', lr)
    train(model_1, optimizer)

learning rate = 1e-05
lowest valid loss = tensor(2.4833, device='cuda:0')
learning rate = 2e-05
lowest valid loss = tensor(2.6959, device='cuda:0')
learning rate = 3e-05
lowest valid loss = tensor(2.4150, device='cuda:0')
learning rate = 4e-05
lowest valid loss = tensor(2.5379, device='cuda:0')
learning rate = 5e-05
lowest valid loss = tensor(2.2889, device='cuda:0')
learning rate = 6e-05
lowest valid loss = tensor(2.8579, device='cuda:0')
learning rate = 7e-05
lowest valid loss = tensor(2.3606, device='cuda:0')
learning rate = 8e-05
lowest valid loss = tensor(3.5498, device='cuda:0')
learning rate = 9e-05
lowest valid loss = tensor(2.8143, device='cuda:0')


In [None]:
# 選擇使用Adam, learning rate = 0.00005
model_1 = torchvision.models.resnet50(pretrained=True)
fc_features = model_1.fc.in_features
model_1.fc = torch.nn.Linear(fc_features, 4)
model_1 = model_1.to(device)
optimizer = torch.optim.Adam(model_1.parameters(), lr=0.00005, weight_decay = 0)
train_test(model_1, optimizer)

Accuracy of the network on the 146 test images: 80 %
Confusion Matrix :
[[ 6  0  1  2]
 [ 0 27  4 11]
 [ 0  4 37  2]
 [ 0  3  2 47]]
Per-class Accuracy : [0.66666667 0.64285714 0.86046512 0.90384615]


Per-class Accuracy: jacket > coat > blazer > cardigan

原先預期: jacket > coat > cardigan > blazer

原先預期cardigan的準確率應該會比blazer高，會有這樣的結果可能是因為cardigan的valid和test中資料的相似程度較高；而blazer的變化較多，test裡在train和valid沒看過的資料數量較多。

# Q3. pretrained=True + 只調整最後一層Fully Connected Layer

In [None]:
# tuning hyperparameter for SGD

learning_rate = [0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.5, 0.9]
for lr in learning_rate:
    model_2 = torchvision.models.resnet50(pretrained=True)
    
    # 固定除了最後一層以外的其他權重
    for para in model_2.parameters():
        para.requires_grad=False
        
    fc_features = model_2.fc.in_features
    model_2.fc = torch.nn.Linear(fc_features, 4)
    model_2 = model_2.to(device) 

    optimizer = torch.optim.SGD(model_2.fc.parameters(), lr=lr)
    print('learning rate =', lr)
    train(model_2, optimizer)

learning rate = 0.001
lowest valid loss = tensor(5.0875, device='cuda:0')
learning rate = 0.005
lowest valid loss = tensor(4.8014, device='cuda:0')
learning rate = 0.009
lowest valid loss = tensor(4.6584, device='cuda:0')
learning rate = 0.01
lowest valid loss = tensor(4.2978, device='cuda:0')
learning rate = 0.05
lowest valid loss = tensor(25.6281, device='cuda:0')
learning rate = 0.09
lowest valid loss = tensor(40.6288, device='cuda:0')
learning rate = 0.1
lowest valid loss = tensor(32.9138, device='cuda:0')
learning rate = 0.5
lowest valid loss = tensor(151.5327, device='cuda:0')
learning rate = 0.9
lowest valid loss = tensor(223.1741, device='cuda:0')


In [None]:
# tuning hyperparameter for Adam

learning_rate = [0.00002, 0.00009, 0.0001, 0.0005, 0.0009, 0.001, 0.005, 0.009, 0.1]
for lr in learning_rate:
    model_2 = torchvision.models.resnet50(pretrained=True)
    # 固定除了最後一層以外的其他權重
    for para in model_2.parameters():
        para.requires_grad=False

    fc_features = model_2.fc.in_features
    model_2.fc = torch.nn.Linear(fc_features, 4)
    model_2 = model_2.to(device) 

    optimizer = torch.optim.Adam(model_2.fc.parameters(), lr=lr)
    print('learning rate =', lr)
    train(model_2, optimizer)

learning rate = 2e-05
lowest valid loss = tensor(5.2111, device='cuda:0')
learning rate = 9e-05
lowest valid loss = tensor(4.8201, device='cuda:0')
learning rate = 0.0001
lowest valid loss = tensor(4.8929, device='cuda:0')
learning rate = 0.0005
lowest valid loss = tensor(4.0600, device='cuda:0')
learning rate = 0.0009
lowest valid loss = tensor(4.4463, device='cuda:0')
learning rate = 0.001
lowest valid loss = tensor(4.7305, device='cuda:0')
learning rate = 0.005
lowest valid loss = tensor(4.4730, device='cuda:0')
learning rate = 0.009
lowest valid loss = tensor(4.6439, device='cuda:0')
learning rate = 0.1
lowest valid loss = tensor(25.7237, device='cuda:0')


In [None]:
# 選擇使用Adam, learning rate = 0.0005
model_2 = torchvision.models.resnet50(pretrained=True)

# 固定除了最後一層以外的其他權重
for para in model_2.parameters():
    para.requires_grad=False

fc_features = model_2.fc.in_features
model_2.fc = torch.nn.Linear(fc_features, 4)
model_2 = model_2.to(device) 

optimizer = torch.optim.Adam(model_2.parameters(), lr=0.0005, weight_decay = 0)
train_test(model_2, optimizer)

Accuracy of the network on the 146 test images: 56 %
Confusion Matrix :
[[ 0  0  1  8]
 [ 0 16  3 23]
 [ 0  4 19 20]
 [ 1  1  2 48]]
Per-class Accuracy : [0.         0.38095238 0.44186047 0.92307692]


# Q4.

In [14]:
# tuning hyperparameter for SGD

learning_rate = [0.00005, 0.00009, 0.0001, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009, 0.001]
for lr in learning_rate:
    model_3 = torchvision.models.resnet50(pretrained=False)
    fc_features = model_3.fc.in_features
    model_3.fc = torch.nn.Linear(fc_features, 4)
    model_3 = model_3.to(device)
    optimizer = torch.optim.SGD(model_3.parameters(), lr=lr)
    print('learning rate =', lr)
    train(model_3, optimizer)

learning rate = 5e-05
lowest valid loss = tensor(5.0631, device='cuda:0')
learning rate = 9e-05
lowest valid loss = tensor(5.2436, device='cuda:0')
learning rate = 0.0001
lowest valid loss = tensor(5.1613, device='cuda:0')
learning rate = 0.0005
lowest valid loss = tensor(5.0577, device='cuda:0')
learning rate = 0.0006
lowest valid loss = tensor(5.2493, device='cuda:0')
learning rate = 0.0007
lowest valid loss = tensor(5.0992, device='cuda:0')
learning rate = 0.0008
lowest valid loss = tensor(5.2202, device='cuda:0')
learning rate = 0.0009
lowest valid loss = tensor(5.1644, device='cuda:0')
learning rate = 0.001
lowest valid loss = tensor(5.2797, device='cuda:0')


In [22]:
# tuning hyperparameter for Adam

learning_rate = [0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009]
for lr in learning_rate:
    model_3 = torchvision.models.resnet50(pretrained=False)
    fc_features = model_3.fc.in_features
    model_3.fc = torch.nn.Linear(fc_features, 4)
    model_3 = model_3.to(device)
    optimizer = torch.optim.Adam(model_3.parameters(), lr=lr)
    print('learning rate =', lr)
    train(model_3, optimizer)

learning rate = 1e-05
lowest valid loss = tensor(5.2601, device='cuda:0')
learning rate = 2e-05
lowest valid loss = tensor(5.1548, device='cuda:0')
learning rate = 3e-05
lowest valid loss = tensor(5.1500, device='cuda:0')
learning rate = 4e-05
lowest valid loss = tensor(5.2977, device='cuda:0')
learning rate = 5e-05
lowest valid loss = tensor(5.0238, device='cuda:0')
learning rate = 6e-05
lowest valid loss = tensor(5.4344, device='cuda:0')
learning rate = 7e-05
lowest valid loss = tensor(5.5510, device='cuda:0')
learning rate = 8e-05
lowest valid loss = tensor(5.1449, device='cuda:0')
learning rate = 9e-05
lowest valid loss = tensor(4.8705, device='cuda:0')


In [30]:
# 選擇使用Adam, learning rate = 0.0009
model_3 = torchvision.models.resnet50(pretrained=False)
fc_features = model_3.fc.in_features
model_3.fc = torch.nn.Linear(fc_features, 4)
model_3 = model_3.to(device)
optimizer = torch.optim.Adam(model_3.parameters(), lr=0.0009)
train_test(model_3, optimizer)

Accuracy of the network on the 146 test images: 41 %
Confusion Matrix :
[[ 0  2  1  6]
 [ 0 19  1 22]
 [ 0 17  1 25]
 [ 0 12  0 40]]
Per-class Accuracy : [0.         0.45238095 0.02325581 0.76923077]


# Q5.

預測能力: Q2 > Q3 > Q4

- 比較Q2和Q3的結果: Q2和Q3的差別在於是否有調整除了最後一層的Fully Connected Layer。
  可以發現如果訓練的過程指訓練Fully Connected Layer，對於預測能力的提升非常有限，即使有使用Pretrained仍然無法有好的預測能力。
  
- 比較Q2和Q4的結果: Q2和Q4的差別在於是否使用Pretrained。
  從結果來看，是否有使用Pretrained對模型的準確率影響非常大，沒有使用跟有使用之間的準確率差了將近40％。
 
- 綜合三題的結果來看，如果想要模型的表現較佳，不管是Pretrained或者是訓練所有層數的參數都是必要的。
  而且是否有Pretrained對模型的影響比只訓練部分層的參數對模型的影響更大。