### 統計學習與深度學習
### Homework 5

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import torchvision.models as models
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
import math

In [15]:
labels = ['blazer', 'cardigan', 'coat', 'jacket']
cnt, blazer_cnt, cardigan_cnt, coat_cnt, jacket_cnt = 0, 0, 0, 0, 0
for i in range(4):
    basepath = os.path.join("photos/train", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    cnt += len(cand_fn)
    if i == 0:
        blazer_cnt += len(cand_fn)
    elif i == 1:
        cardigan_cnt += len(cand_fn)
    elif i == 2:
        coat_cnt += len(cand_fn)
    elif i == 3:
        jacket_cnt += len(cand_fn)
print("train總照片數 = ", cnt)

train總照片數 =  1041


In [16]:
cnt = 0
for i in range(4):
    basepath = os.path.join("photos/valid", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    cnt += len(cand_fn)
    if i == 0:
        blazer_cnt += len(cand_fn)
    elif i == 1:
        cardigan_cnt += len(cand_fn)
    elif i == 2:
        coat_cnt += len(cand_fn)
    elif i == 3:
        jacket_cnt += len(cand_fn)
print("valid總照片數 = ", cnt)

valid總照片數 =  105


In [17]:
cnt = 0
for i in range(4):
    basepath = os.path.join("photos/test", labels[i], "*.jpg")
    cand_fn = glob.glob(basepath)
    cnt += len(cand_fn)
    if i == 0:
        blazer_cnt += len(cand_fn)
    elif i == 1:
        cardigan_cnt += len(cand_fn)
    elif i == 2:
        coat_cnt += len(cand_fn)
    elif i == 3:
        jacket_cnt += len(cand_fn)
print("test總照片數 = ", cnt)

test總照片數 =  146


In [18]:
cnt = blazer_cnt + cardigan_cnt + coat_cnt + jacket_cnt
print("blazer照片數 = "+str(blazer_cnt)+", 比率 = "+str(blazer_cnt/cnt))
print("cardigan照片數 = "+str(cardigan_cnt)+", 比率 = "+str(cardigan_cnt/cnt))
print("coat照片數 = "+str(coat_cnt)+", 比率 = "+str(coat_cnt/cnt))
print("jacket照片數 = "+str(jacket_cnt)+", 比率 = "+str(jacket_cnt/cnt))

blazer照片數 = 113, 比率 = 0.08746130030959752
cardigan照片數 = 315, 比率 = 0.24380804953560373
coat照片數 = 366, 比率 = 0.28328173374613
jacket照片數 = 498, 比率 = 0.38544891640866874


In [2]:
def readfile(path):
    labels = ['blazer', 'cardigan', 'coat', 'jacket']
    for i in range(4):
        basepath = os.path.join(path, labels[i], "*.jpg")
        cand_fn = glob.glob(basepath)

        tmp_x = np.zeros((len(cand_fn), 342, 256, 3), dtype=np.uint8)
        tmp_y = np.zeros((len(cand_fn)), dtype=np.uint8)
        for j in range(len(cand_fn)):
            img = cv2.imread(cand_fn[j])
            tmp_x[j, :, :] = cv2.resize(img,(256, 342))
            tmp_y[j] = i
        if i == 0:
            x = tmp_x
            y = tmp_y
        else:
            x = np.concatenate((x, tmp_x))
            y = np.concatenate((y, tmp_y))
    return x, y


In [3]:
x_train, y_train = readfile("photos/train")
x_valid, y_valid = readfile("photos/valid")
x_test, y_test = readfile("photos/test")

In [4]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [15]:
def training(optim, lr, m, device):
    model = models.resnet50(pretrained=True).to(device)
    model.fc.out_features = 4
    loss = nn.CrossEntropyLoss()
    if optim == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=m)
    elif optim == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    num_epoch = 200

    best_cnt = 0
    best_loss = math.inf

    for epoch in range(num_epoch):
        epoch_start_time = time.time()
        train_loss = 0.0
        valid_loss = 0.0
        
        model.train()
        for i, data in enumerate(train_loader):
            optimizer.zero_grad()
            train_pred = model(data[0].to(device))
            batch_loss = loss(train_pred, data[1].to(device))
            batch_loss.backward()
            optimizer.step()
            train_loss += batch_loss.item()
        
        
        with torch.no_grad():
            for i, data in enumerate(valid_loader):
                valid_pred = model(data[0].to(device))
                batch_loss = loss(valid_pred, data[1].to(device))
                valid_loss += batch_loss.item()
            
        if valid_loss < best_loss:
            best_loss = valid_loss
            best_cnt = 0
            torch.save(model.state_dict(), './model/'+optim+'_lr'+str(lr)+'_m'+str(m)+'.pkl')
            
        if best_cnt >= 20:
            break
            
        best_cnt += 1
        
    return best_loss
       

In [16]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    device = "cuda"
else:
    device = "cpu"

In [17]:
batch_size = 32

train_set = ImgDataset(x_train, y_train, train_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_set = ImgDataset(x_valid, y_valid, test_transform)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True)

In [18]:
# SGD fine-tuning
lr = [0.0001, 0.0005, 0.001, 0.005, 0.01]
m = [0.0, 0.5, 0.9]
for i in lr:
    for j in m:
        loss = training('SGD', i, j, device)
        print('SGD learning_rate = '+str(i)+', momentum = '+str(j)+', valid loss = '+str(loss))

SGD learning_rate = 0.0001, momentum = 0.0, valid loss = 3.794820010662079
SGD learning_rate = 0.0001, momentum = 0.5, valid loss = 3.121865600347519
SGD learning_rate = 0.0001, momentum = 0.9, valid loss = 2.771882563829422
SGD learning_rate = 0.0005, momentum = 0.0, valid loss = 2.6920808255672455
SGD learning_rate = 0.0005, momentum = 0.5, valid loss = 2.854362651705742
SGD learning_rate = 0.0005, momentum = 0.9, valid loss = 2.8479690849781036
SGD learning_rate = 0.001, momentum = 0.0, valid loss = 3.1306369602680206
SGD learning_rate = 0.001, momentum = 0.5, valid loss = 2.638573467731476
SGD learning_rate = 0.001, momentum = 0.9, valid loss = 2.847838431596756
SGD learning_rate = 0.005, momentum = 0.0, valid loss = 2.854484885931015
SGD learning_rate = 0.005, momentum = 0.5, valid loss = 3.1163567900657654
SGD learning_rate = 0.005, momentum = 0.9, valid loss = 3.708523839712143
SGD learning_rate = 0.01, momentum = 0.0, valid loss = 2.658783495426178
SGD learning_rate = 0.01, mom

In [None]:
# Adam fine-tuning
lr = [0.0001, 0.00025, 0.0005, 0.00075, 0.001, 0.0025, 0.005, 0.0075, 0.01]
for i in lr:
    loss = training('Adam', i, 0.0, device)
    print('Adam learning_rate = '+str(i)+', valid loss = '+str(loss))

In [29]:
test_set = ImgDataset(x_test, y_test, test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model = models.resnet50(pretrained=True).to(device)
model.fc.out_features = 4
model.load_state_dict(torch.load('./model/SGD_lr0.001_m0.5.pkl'))
model.eval()
test_acc = 0.0
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model(data[0].to(device))
        test_acc += np.sum(np.argmax(test_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        
print(test_acc/test_set.__len__())

0.821917808219178
