## Statistical Learning and Deep Learning HW5

### Q1

In [1]:
import numpy as np
import pandas as pd
import glob
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models

In [2]:
datasets = ['train', 'valid' ,'test']
labels = ['blazer', 'cardigan', 'coat', 'jacket']
base_path = '/tmp2/b06705028/sldl'
data_path = f'{base_path}/photos'

In [3]:
df = pd.DataFrame(columns=labels, index=datasets)
for ds in datasets:
    for lb in labels:
        basepath = os.path.join(f'{data_path}/{ds}/{lb}/', '*.jpg')
        cand_fn = glob.glob(basepath)
        df[lb][ds] = len(cand_fn)
df['total'] = df.sum(axis=1).astype('int')
print(df)

      blazer cardigan coat jacket  total
train     97      237  296    411   1041
valid      7       36   27     35    105
test       9       42   43     52    146


In [4]:
print('Ratio:')
df = df.drop(['total'], axis=1)
print (df.div(df.sum(axis=1), axis=0))

Ratio:
         blazer  cardigan      coat    jacket
train  0.093180  0.227666  0.284342  0.394813
valid  0.066667  0.342857  0.257143  0.333333
test   0.061644  0.287671  0.294521  0.356164


Given the number of instances of each image type, I suggest that the accuracy of the classification task will be jacket > coat > cardigan > blazer. This follows the hypothesis that larger number of instances in training set causes higher classification accuracy.

### Q2

In [5]:
# Image transformations
image_transforms = {
    'train':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
    'valid':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
    'test':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [6]:
# Datasets from folders
data = {
    'train': ImageFolder(root=f'{data_path}/train/', transform=image_transforms['train']),
    'valid': ImageFolder(root=f'{data_path}/valid/', transform=image_transforms['valid']),
    'test': ImageFolder(root=f'{data_path}/test/', transform=image_transforms['test'])
}

In [7]:
# Dataloader
batch_size = 32
dataloaders = {
    'train': DataLoader(data['train'], batch_size=batch_size, shuffle=True, num_workers=4),
    'valid': DataLoader(data['valid'], batch_size=batch_size, shuffle=True, num_workers=4),
    'test': DataLoader(data['test'], batch_size=batch_size, shuffle=True, num_workers=4)
}

In [8]:
# device
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'using {device} {torch.cuda.current_device()}')

using cuda 0


In [9]:
# loss
loss_fn = nn.CrossEntropyLoss()

# hyper-parameters
all_lr = [0.001, 0.005, 0.01, 0.05]
all_wd = [0, 0.0001, 0.001]

In [10]:
def cross_entropy_loss(model, data_loader):
    loss = 0
    with torch.no_grad():
        for batch, (inputs, targets) in enumerate(data_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss += loss_fn(outputs, targets)
    return loss

In [11]:
def train(optim, model, weight_path, fix_weight=True, early_stop_patient=20, max_epoch=200):
    best_valid_loss = np.inf
    best_valid_epoch = 0
    for epoch in range(max_epoch):
        # train
        train_loss = 0
        for batch, (inputs, targets) in enumerate(dataloaders['train']):
            inputs, targets = inputs.to(device), targets.to(device)
            model.train()
            optim.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optim.step()
            train_loss += loss.item()
            
        # validation
        model.eval()
        valid_loss = cross_entropy_loss(model, dataloaders['valid']).cpu().numpy()
 
        # update weight if lower validation loss
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_valid_epoch = epoch
            torch.save(model, weight_path)
            
        # early stopping
        elif (epoch - best_valid_epoch >= early_stop_patient):
            print(f'early stopping at epoch {epoch}, best_valid = {best_valid_loss:.3f}')
            return best_valid_loss
        
        if (epoch % 20 == 0):
            print(f'epoch {epoch}: train_loss = {train_loss:.3f}, valid_loss = {valid_loss:.3f}, best_valid = {best_valid_loss:.3f}')
    
    return best_valid_loss

#### SGD

In [2]:
sgd_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train SGD lr = {lr}, weight_decay = {wd}')
        weight_path = f'{base_path}/Q2_weight_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        # for param in model.parameters():
        #     param.requires_grad = False
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
        valid_loss = train(optim, model, weight_path)
        sgd_valid[lr][wd] = valid_loss

NameError: name 'pd' is not defined

In [1]:
print(sgd_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

# minimum = sgd_valid.min(axis=0).min()
# sgd_valid = sgd_valid.astype(float)
# print(sgd_valid.idxmin(axis=0).idxmin(axis=1))
sgd_min_wd = -1
sgd_min_lr = -1
sgd_minimum = np.inf
for lr in sgd_valid.columns:
    for wd in sgd_valid.index:
        if sgd_valid[lr][wd] < sgd_minimum:
            sgd_min_wd = wd
            sgd_min_lr = lr
            sgd_minimum = sgd_valid[lr][wd]
print(f'minimum validation loss {sgd_minimum:.3f} at lr={sgd_min_lr}, wd={sgd_min_wd}')

NameError: name 'sgd_valid' is not defined

In [None]:
# for lr in all_lr:
#     weight_path = f'{base_path}/Q2_weight_{lr}'
#     saved_model = torch.load(weight_path)
#     test_size = len(data['test'])
#     n_correct = 0
#     with torch.no_grad():
#         for batch, (inputs, targets) in enumerate(dataloaders['test']):
#             inputs, targets = inputs.to(device), targets.to(device)
#             outputs = saved_model(inputs)
#             _, preds = torch.max(outputs, 1)
#             correct = (targets==preds).cpu().numpy()
#             n_correct += np.sum(correct)
#     print(f'SGD, lr = {lr}: test accuracy: ', n_correct / len(data['test']))

In [None]:
weight_path = f'{base_path}/Q2_weight_{min_lr}_{min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(SGD) test accuracy: {n_correct / len(data['test'])}')

### Adam

In [None]:
adam_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train Adam lr = {lr}')
        weight_path = f'{base_path}/Q2_weight_adam_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.SGD(model.parameters(), lr=lr)
        adam_valid[lr][wd] = train(optim, model, weight_path)

In [None]:
print(adam_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

# minimum = sgd_valid.min(axis=0).min()
# sgd_valid = sgd_valid.astype(float)
# print(sgd_valid.idxmin(axis=0).idxmin(axis=1))
adam_min_wd = -1
adam_min_lr = -1
adam_minimum = np.inf
for lr in adam_valid.columns:
    for wd in adam_valid.index:
        if adam_valid[lr][wd] < adam_minimum:
            adam_min_wd = wd
            adam_min_lr = lr
            adam_minimum = adam_valid[lr][wd]
print(f'minimum validation loss {adam_minimum:.3f} at lr={adam_min_lr}, wd={adam_min_wd}')

In [None]:
# for lr in all_lr:
#     weight_path = f'{base_path}/Q2_weight_adam_{lr}'
#     print(f'loading {weight_path}')
#     saved_model = torch.load(weight_path)
#     test_size = len(data['test'])
#     n_correct = 0
#     with torch.no_grad():
#         for batch, (inputs, targets) in enumerate(dataloaders['test']):
#             inputs, targets = inputs.to(device), targets.to(device)
#             outputs = saved_model(inputs)
#             _, preds = torch.max(outputs, 1)
#             correct = (targets==preds).cpu().numpy()
#             n_correct += np.sum(correct)
#     print(f'Adam, lr = {lr}: test accuracy: ', n_correct / len(data['test']))

In [None]:
weight_path = f'{base_path}/Q2_weight_adam_{min_lr}_{min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(Adam) test accuracy: {n_correct / len(data['test'])}')