## Statistical Learning and Deep Learning HW5

### Q1

In [3]:
import numpy as np
import pandas as pd
import glob
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models
from sklearn.metrics import confusion_matrix

In [4]:
datasets = ['train', 'valid' ,'test']
labels = ['blazer', 'cardigan', 'coat', 'jacket']
base_path = '/tmp2/b06705028/sldl'
data_path = f'{base_path}/photos'

In [5]:
df = pd.DataFrame(columns=labels, index=datasets)
for ds in datasets:
    for lb in labels:
        basepath = os.path.join(f'{data_path}/{ds}/{lb}/', '*.jpg')
        cand_fn = glob.glob(basepath)
        df[lb][ds] = len(cand_fn)
df['total'] = df.sum(axis=1).astype('int')
print(df)

      blazer cardigan coat jacket  total
train     97      237  296    411   1041
valid      7       36   27     35    105
test       9       42   43     52    146


In [6]:
print('Ratio:')
df = df.drop(['total'], axis=1)
print (df.div(df.sum(axis=1), axis=0))

Ratio:
         blazer  cardigan      coat    jacket
train  0.093180  0.227666  0.284342  0.394813
valid  0.066667  0.342857  0.257143  0.333333
test   0.061644  0.287671  0.294521  0.356164


Given the number of instances of each image type, I suggest that the accuracy of the classification task will be jacket > coat > cardigan > blazer. This follows the hypothesis that larger number of instances in training set causes higher classification accuracy.

### Q2

In [7]:
# Image transformations
image_transforms = {
    'train':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
    'valid':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
    'test':
        transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [8]:
# Datasets from folders
data = {
    'train': ImageFolder(root=f'{data_path}/train/', transform=image_transforms['train']),
    'valid': ImageFolder(root=f'{data_path}/valid/', transform=image_transforms['valid']),
    'test': ImageFolder(root=f'{data_path}/test/', transform=image_transforms['test'])
}

In [9]:
# Dataloader
batch_size = 32
dataloaders = {
    'train': DataLoader(data['train'], batch_size=batch_size, shuffle=True, num_workers=4),
    'valid': DataLoader(data['valid'], batch_size=batch_size, shuffle=True, num_workers=4),
    'test': DataLoader(data['test'], batch_size=batch_size, shuffle=True, num_workers=4)
}

In [10]:
# device
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'using {device} {torch.cuda.current_device()}')

using cuda 0


In [11]:
# loss
loss_fn = nn.CrossEntropyLoss()

In [12]:
def cross_entropy_loss(model, data_loader):
    loss = 0
    with torch.no_grad():
        for batch, (inputs, targets) in enumerate(data_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss += loss_fn(outputs, targets)
    return loss

In [13]:
def train(optim, model, weight_path, early_stop_patient=20, max_epoch=200):
    best_valid_loss = np.inf
    best_valid_epoch = 0
    for epoch in range(max_epoch):
        # train
        train_loss = 0
        for batch, (inputs, targets) in enumerate(dataloaders['train']):
            inputs, targets = inputs.to(device), targets.to(device)
            model.train()
            optim.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optim.step()
            train_loss += loss.item()
            
        # validation
        model.eval()
        valid_loss = cross_entropy_loss(model, dataloaders['valid']).cpu().numpy()
 
        # update weight if lower validation loss
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_valid_epoch = epoch
            torch.save(model, weight_path)
            
        # early stopping
        elif (epoch - best_valid_epoch >= early_stop_patient):
            print(f'early stopping at epoch {epoch}, best_valid = {best_valid_loss:.3f}')
            return best_valid_loss
        
        if (epoch % 20 == 0):
            print(f'epoch {epoch}: train_loss = {train_loss:.3f}, valid_loss = {valid_loss:.3f}, best_valid = {best_valid_loss:.3f}')
    
    return best_valid_loss

#### SGD

In [20]:
all_lr = [0.001, 0.005, 0.01, 0.05]
all_wd = [0, 0.0001, 0.001]
sgd_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train SGD lr = {lr}, weight_decay = {wd}')
        weight_path = f'{base_path}/Q2_weight_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
        valid_loss = train(optim, model, weight_path)
        sgd_valid[lr][wd] = valid_loss


train SGD lr = 0.001, weight_decay = 0
model at cuda
epoch 0: train_loss = 42.875, valid_loss = 5.238, best_valid = 5.238
epoch 20: train_loss = 34.223, valid_loss = 4.460, best_valid = 4.294
epoch 40: train_loss = 27.082, valid_loss = 3.820, best_valid = 3.635
epoch 60: train_loss = 22.329, valid_loss = 3.515, best_valid = 3.164
epoch 80: train_loss = 18.321, valid_loss = 3.115, best_valid = 3.115
epoch 100: train_loss = 15.469, valid_loss = 2.882, best_valid = 2.849
early stopping at epoch 111, best_valid = 2.849

train SGD lr = 0.001, weight_decay = 0.0001
model at cuda
epoch 0: train_loss = 42.475, valid_loss = 5.346, best_valid = 5.346
epoch 20: train_loss = 34.183, valid_loss = 4.603, best_valid = 4.307
epoch 40: train_loss = 25.705, valid_loss = 3.771, best_valid = 3.586
epoch 60: train_loss = 21.222, valid_loss = 3.642, best_valid = 3.178
epoch 80: train_loss = 18.036, valid_loss = 3.146, best_valid = 3.073
epoch 100: train_loss = 13.140, valid_loss = 3.039, best_valid = 2.708

In [21]:
print(sgd_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

sgd_min_wd = -1
sgd_min_lr = -1
sgd_minimum = np.inf
for lr in sgd_valid.columns:
    for wd in sgd_valid.index:
        if sgd_valid[lr][wd] < sgd_minimum:
            sgd_min_wd = wd
            sgd_min_lr = lr
            sgd_minimum = sgd_valid[lr][wd]
print(f'minimum validation loss {sgd_minimum:.3f} at lr={sgd_min_lr}, wd={sgd_min_wd}')

            0.001      0.005      0.010      0.050
0.0000  2.8494473  3.0142472  3.3445148  3.2017083
0.0001  2.7076368  2.7738614  3.1947072  3.7165773
0.0010  2.8433902  2.8717287  3.1435347  3.4962337
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 2.708 at lr=0.001, wd=0.0001


In [22]:
weight_path = f'{base_path}/Q2_weight_{sgd_min_lr}_{sgd_min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(SGD) test accuracy: {n_correct / test_size}')

load /tmp2/b06705028/sldl/Q2_weight_0.001_0.0001
(SGD) test accuracy: 0.6917808219178082


#### Adam

In [17]:
all_lr = [0.00001, 0.00005, 0.0001, 0.0005]
all_wd = [0, 0.0001, 0.001]
adam_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train Adam lr = {lr}, weight decay = {wd}')
        weight_path = f'{base_path}/Q2_weight_adam_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        adam_valid[lr][wd] = train(optim, model, weight_path)


train Adam lr = 1e-05, weight decay = 0
model at cuda
epoch 0: train_loss = 45.607, valid_loss = 5.620, best_valid = 5.620
epoch 20: train_loss = 14.713, valid_loss = 3.586, best_valid = 3.244
epoch 40: train_loss = 8.418, valid_loss = 3.512, best_valid = 2.990
early stopping at epoch 47, best_valid = 2.990

train Adam lr = 1e-05, weight decay = 0.0001
model at cuda
epoch 0: train_loss = 47.093, valid_loss = 5.556, best_valid = 5.556
epoch 20: train_loss = 14.889, valid_loss = 3.537, best_valid = 2.983
early stopping at epoch 37, best_valid = 2.983

train Adam lr = 1e-05, weight decay = 0.001
model at cuda
epoch 0: train_loss = 46.690, valid_loss = 5.218, best_valid = 5.218
epoch 20: train_loss = 14.846, valid_loss = 3.726, best_valid = 3.332
epoch 40: train_loss = 7.889, valid_loss = 4.375, best_valid = 3.192
early stopping at epoch 47, best_valid = 3.192

train Adam lr = 5e-05, weight decay = 0
model at cuda
epoch 0: train_loss = 39.532, valid_loss = 4.439, best_valid = 4.439
epoch 

In [18]:
print(adam_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

adam_min_wd = -1
adam_min_lr = -1
adam_minimum = np.inf
for lr in adam_valid.columns:
    for wd in adam_valid.index:
        if adam_valid[lr][wd] < adam_minimum:
            adam_min_wd = wd
            adam_min_lr = lr
            adam_minimum = adam_valid[lr][wd]
print(f'minimum validation loss {adam_minimum:.3f} at lr={adam_min_lr}, wd={adam_min_wd}')

          0.00001    0.00005    0.00010    0.00050
0.0000    2.99024   3.041418  3.1356177  3.2901216
0.0001  2.9833064  2.8774087  3.0620637  3.5125751
0.0010  3.1924808   2.661787  2.8352168  4.5057206
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 2.662 at lr=5e-05, wd=0.001


In [19]:
weight_path = f'{base_path}/Q2_weight_adam_{adam_min_lr}_{adam_min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(Adam) test accuracy: {n_correct / test_size}')

load /tmp2/b06705028/sldl/Q2_weight_adam_5e-05_0.001
(Adam) test accuracy: 0.6917808219178082


Test accuracy: 0.692

In [23]:
# confusion matrix
confusion = np.zeros((len(labels), len(labels)))
weight_path = '/tmp2/b06705028/sldl/Q2_weight_adam_5e-05_0.001'
saved_model = torch.load(weight_path)
y_truth = np.array([])
y_pred = np.array([])
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        y_truth = np.concatenate((y_truth, targets.cpu().numpy()))
        y_pred = np.concatenate((y_pred, preds.cpu().numpy()))
confusion = confusion_matrix(y_truth, y_pred)
print('Confusion Matrix (row: truth, column: predicted)\n')
print(confusion)

Confusion Matrix (row: truth, column: predicted)

[[ 5  0  2  2]
 [ 2 21 10  9]
 [ 0  5 33  5]
 [ 2  7  1 42]]


In [24]:
# per class accuracy
for i in range(len(labels)):
    total = confusion[i].sum()
    correct = confusion[i][i]
    print(f'{labels[i]}: {correct / total:.3f}')

blazer: 0.556
cardigan: 0.500
coat: 0.767
jacket: 0.808


In Q1, we guessed that per-class accuracy will be jacket > coat > cardigan > blazer. Actually, the relation between them is jacket > coat> blazer > cardigan . Prediction accuracy of blazer is higher that of cardigan.

### Q3

#### SGD

In [46]:
all_lr = [0.001, 0.005, 0.01, 0.05]
all_wd = [0, 0.0001, 0.001]
sgd_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train SGD lr = {lr}, weight_decay = {wd}')
        weight_path = f'{base_path}/Q3_weight_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 4)
        for param in model.parameters():
            param.requires_grad = False
        for param in model.fc.parameters():
            param.requires_grad = True
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
        valid_loss = train(optim, model, weight_path)
        sgd_valid[lr][wd] = valid_loss


train SGD lr = 0.001, weight_decay = 0
model at cuda
epoch 0: train_loss = 43.137, valid_loss = 5.371, best_valid = 5.371
epoch 20: train_loss = 38.277, valid_loss = 4.632, best_valid = 4.632
epoch 40: train_loss = 36.090, valid_loss = 4.440, best_valid = 4.384
epoch 60: train_loss = 35.226, valid_loss = 4.436, best_valid = 4.301
early stopping at epoch 75, best_valid = 4.301

train SGD lr = 0.001, weight_decay = 0.0001
model at cuda
epoch 0: train_loss = 43.646, valid_loss = 5.295, best_valid = 5.295
epoch 20: train_loss = 37.954, valid_loss = 4.658, best_valid = 4.629
epoch 40: train_loss = 36.527, valid_loss = 4.697, best_valid = 4.408
epoch 60: train_loss = 34.944, valid_loss = 4.391, best_valid = 4.293
epoch 80: train_loss = 34.460, valid_loss = 4.613, best_valid = 4.277
epoch 100: train_loss = 34.304, valid_loss = 4.243, best_valid = 4.188
early stopping at epoch 111, best_valid = 4.188

train SGD lr = 0.001, weight_decay = 0.001
model at cuda
epoch 0: train_loss = 42.955, valid

In [47]:
print(sgd_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

sgd_min_wd = -1
sgd_min_lr = -1
sgd_minimum = np.inf
for lr in sgd_valid.columns:
    for wd in sgd_valid.index:
        if sgd_valid[lr][wd] < sgd_minimum:
            sgd_min_wd = wd
            sgd_min_lr = lr
            sgd_minimum = sgd_valid[lr][wd]
print(f'minimum validation loss {sgd_minimum:.3f} at lr={sgd_min_lr}, wd={sgd_min_wd}')

           0.001      0.005      0.010      0.050
0.0000  4.300536   4.209583   4.299723  5.7730145
0.0001  4.188382   4.266525  4.3477793   6.596818
0.0010  4.232094  4.2428756   4.383698  6.6515093
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 4.188 at lr=0.001, wd=0.0001


In [49]:
weight_path = f'{base_path}/Q3_weight_{sgd_min_lr}_{sgd_min_wd}'
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(SGD) test accuracy: {n_correct / test_size}')

(SGD) test accuracy: 0.4931506849315068


#### Adam

In [25]:
all_lr = [0.00001, 0.00005, 0.0001, 0.0005]
all_wd = [0, 0.0001, 0.001]
adam_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train Adam lr = {lr}, weight decay = {wd}')
        weight_path = f'{base_path}/Q3_weight_adam_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 4)
        for param in model.parameters():
            param.requires_grad = False
        for param in model.fc.parameters():
            param.requires_grad = True
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        adam_valid[lr][wd] = train(optim, model, weight_path)


train Adam lr = 1e-05, weight decay = 0
model at cuda
epoch 0: train_loss = 45.747, valid_loss = 5.418, best_valid = 5.418
epoch 20: train_loss = 41.365, valid_loss = 5.080, best_valid = 4.998
epoch 40: train_loss = 40.284, valid_loss = 4.848, best_valid = 4.848
epoch 60: train_loss = 39.538, valid_loss = 4.833, best_valid = 4.775
epoch 80: train_loss = 38.684, valid_loss = 4.734, best_valid = 4.671
epoch 100: train_loss = 37.721, valid_loss = 4.722, best_valid = 4.480
early stopping at epoch 117, best_valid = 4.480

train Adam lr = 1e-05, weight decay = 0.0001
model at cuda
epoch 0: train_loss = 45.011, valid_loss = 5.504, best_valid = 5.504
epoch 20: train_loss = 41.518, valid_loss = 5.281, best_valid = 5.055
epoch 40: train_loss = 40.125, valid_loss = 5.122, best_valid = 4.971
epoch 60: train_loss = 39.277, valid_loss = 5.006, best_valid = 4.885
epoch 80: train_loss = 38.431, valid_loss = 5.108, best_valid = 4.809
epoch 100: train_loss = 37.765, valid_loss = 5.085, best_valid = 4.6

In [26]:
print(adam_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

adam_min_wd = -1
adam_min_lr = -1
adam_minimum = np.inf
for lr in adam_valid.columns:
    for wd in adam_valid.index:
        if adam_valid[lr][wd] < adam_minimum:
            adam_min_wd = wd
            adam_min_lr = lr
            adam_minimum = adam_valid[lr][wd]
print(f'minimum validation loss {adam_minimum:.3f} at lr={adam_min_lr}, wd={adam_min_wd}')

          0.00001   0.00005    0.00010    0.00050
0.0000   4.479928  4.224363  4.2429733  4.1592846
0.0001  4.6933312  4.403717   4.255934   4.418859
0.0010  4.5794396  4.288756  4.1779165   3.964293
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 3.964 at lr=0.0005, wd=0.001


In [27]:
weight_path = f'{base_path}/Q3_weight_adam_{adam_min_lr}_{adam_min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(Adam) test accuracy: {n_correct / test_size}')

load /tmp2/b06705028/sldl/Q3_weight_adam_0.0005_0.001
(Adam) test accuracy: 0.5


Test accuracy: 0.5

In [28]:
# confusion matrix
confusion = np.zeros((len(labels), len(labels)))
weight_path = '/tmp2/b06705028/sldl/Q3_weight_adam_0.01_0.001'
saved_model = torch.load(weight_path)
y_truth = np.array([])
y_pred = np.array([])
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        y_truth = np.concatenate((y_truth, targets.cpu().numpy()))
        y_pred = np.concatenate((y_pred, preds.cpu().numpy()))
confusion = confusion_matrix(y_truth, y_pred)
print('Confusion Matrix (row: truth, column: predicted)\n')
print(confusion)

Confusion Matrix (row: truth, column: predicted)

[[ 1  2  2  4]
 [ 0 28  3 11]
 [ 0 15 14 14]
 [ 0 21  1 30]]


In [78]:
# per class accuracy
print('per class accuracy:')
for i in range(len(labels)):
    total = confusion[i].sum()
    correct = confusion[i][i]
    print(f'{labels[i]}: {correct / total:.3f}')

per class accuracy:
blazer: 0.111
cardigan: 0.667
coat: 0.326
jacket: 0.577


Keeping all weights fixed, except the fully connected layer, the overall accuracy is much lower than that we got in Q2.

### Q4

### SGD

In [79]:
sgd_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train SGD lr = {lr}, weight_decay = {wd}')
        weight_path = f'{base_path}/Q4_weight_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=False)
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
        valid_loss = train(optim, model, weight_path)
        sgd_valid[lr][wd] = valid_loss


train SGD lr = 0.001, weight_decay = 0
model at cuda
epoch 0: train_loss = 43.576, valid_loss = 5.416, best_valid = 5.416
epoch 20: train_loss = 42.656, valid_loss = 5.277, best_valid = 4.998
early stopping at epoch 28, best_valid = 4.998

train SGD lr = 0.001, weight_decay = 0.0001
model at cuda
epoch 0: train_loss = 43.393, valid_loss = 5.304, best_valid = 5.304
epoch 20: train_loss = 42.836, valid_loss = 5.517, best_valid = 5.092
early stopping at epoch 38, best_valid = 5.092

train SGD lr = 0.001, weight_decay = 0.001
model at cuda
epoch 0: train_loss = 43.334, valid_loss = 5.378, best_valid = 5.378
epoch 20: train_loss = 42.671, valid_loss = 4.991, best_valid = 4.991
early stopping at epoch 40, best_valid = 4.991

train SGD lr = 0.005, weight_decay = 0
model at cuda
epoch 0: train_loss = 74.163, valid_loss = 7.658, best_valid = 7.658
epoch 20: train_loss = 44.624, valid_loss = 5.159, best_valid = 5.008
epoch 40: train_loss = 43.250, valid_loss = 4.955, best_valid = 4.955
early st

In [80]:
print(sgd_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

sgd_min_wd = -1
sgd_min_lr = -1
sgd_minimum = np.inf
for lr in sgd_valid.columns:
    for wd in sgd_valid.index:
        if sgd_valid[lr][wd] < sgd_minimum:
            sgd_min_wd = wd
            sgd_min_lr = lr
            sgd_minimum = sgd_valid[lr][wd]
print(f'minimum validation loss {sgd_minimum:.3f} at lr={sgd_min_lr}, wd={sgd_min_wd}')

            0.001      0.005      0.010      0.050
0.0000   4.998086  4.9553504  4.9137764  4.9439445
0.0001  5.0923233  5.0064526   4.976336  4.9054356
0.0010   4.991056  4.9016933  4.9188795  4.9398346
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 4.902 at lr=0.005, wd=0.001


In [81]:
weight_path = f'{base_path}/Q4_weight_{sgd_min_lr}_{sgd_min_wd}'
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(SGD) test accuracy: {n_correct / test_size}')

(SGD) test accuracy: 0.3835616438356164


#### Adam

In [29]:
all_lr = [0.00001, 0.00005, 0.0001, 0.0005]
all_wd = [0, 0.0001, 0.001]
adam_valid = pd.DataFrame(columns=all_lr, index=all_wd)

for lr in all_lr:
    for wd in all_wd:
        print('\n============================================')
        print(f'train Adam lr = {lr}, weight decay = {wd}')
        weight_path = f'{base_path}/Q4_weight_adam_{lr}_{wd}'

        # load pretrained resnet50 and set the output dimension to 4
        model = models.resnet50(pretrained=False)
        model.fc = nn.Linear(model.fc.in_features, 4)
        model.to(device)
        print(f'model at {device}')

        # train
        optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        adam_valid[lr][wd] = train(optim, model, weight_path)


train Adam lr = 1e-05, weight decay = 0
model at cuda
epoch 0: train_loss = 45.448, valid_loss = 5.402, best_valid = 5.402
epoch 20: train_loss = 41.439, valid_loss = 5.256, best_valid = 5.064
epoch 40: train_loss = 40.434, valid_loss = 5.726, best_valid = 4.974
epoch 60: train_loss = 40.162, valid_loss = 5.133, best_valid = 4.867
early stopping at epoch 63, best_valid = 4.867

train Adam lr = 1e-05, weight decay = 0.0001
model at cuda
epoch 0: train_loss = 52.293, valid_loss = 6.334, best_valid = 6.334
epoch 20: train_loss = 41.023, valid_loss = 5.010, best_valid = 4.962
epoch 40: train_loss = 40.788, valid_loss = 5.458, best_valid = 4.884
early stopping at epoch 42, best_valid = 4.884

train Adam lr = 1e-05, weight decay = 0.001
model at cuda
epoch 0: train_loss = 46.793, valid_loss = 5.334, best_valid = 5.334
epoch 20: train_loss = 41.234, valid_loss = 5.314, best_valid = 5.075
epoch 40: train_loss = 40.747, valid_loss = 5.178, best_valid = 5.032
early stopping at epoch 45, best_va

In [30]:
print(adam_valid)
print('(x_axis: learning rate)')
print('(y_axis: weight decay)\n')

adam_min_wd = -1
adam_min_lr = -1
adam_minimum = np.inf
for lr in adam_valid.columns:
    for wd in adam_valid.index:
        if adam_valid[lr][wd] < adam_minimum:
            adam_min_wd = wd
            adam_min_lr = lr
            adam_minimum = adam_valid[lr][wd]
print(f'minimum validation loss {adam_minimum:.3f} at lr={adam_min_lr}, wd={adam_min_wd}')

          0.00001   0.00005    0.00010    0.00050
0.0000  4.8669505  4.928935  4.9500265   4.939928
0.0001  4.8838434  4.906565  4.9956007  5.0377192
0.0010   5.031926  5.050787   4.886034   4.972141
(x_axis: learning rate)
(y_axis: weight decay)

minimum validation loss 4.867 at lr=1e-05, wd=0.0


In [32]:
weight_path = f'{base_path}/Q4_weight_adam_{adam_min_lr}_{adam_min_wd}'
print(f'load {weight_path}')
saved_model = torch.load(weight_path)
test_size = len(data['test'])
n_correct = 0
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        correct = (targets==preds).cpu().numpy()
        n_correct += np.sum(correct)
print(f'(Adam) test accuracy: {n_correct / test_size}')

load /tmp2/b06705028/sldl/Q4_weight_adam_1e-05_0
(Adam) test accuracy: 0.3287671232876712


In [35]:
# confusion matrix
confusion = np.zeros((len(labels), len(labels)))
weight_path = '/tmp2/b06705028/sldl/Q4_weight_0.005_0.001'
saved_model = torch.load(weight_path)
y_truth = np.array([])
y_pred = np.array([])
with torch.no_grad():
    for batch, (inputs, targets) in enumerate(dataloaders['test']):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = saved_model(inputs)
        _, preds = torch.max(outputs, 1)
        y_truth = np.concatenate((y_truth, targets.cpu().numpy()))
        y_pred = np.concatenate((y_pred, preds.cpu().numpy()))
confusion = confusion_matrix(y_truth, y_pred)
print('Confusion Matrix (row: truth, column: predicted)\n')
print(confusion)

Confusion Matrix (row: truth, column: predicted)

[[ 0  6  0  3]
 [ 0 27  0 15]
 [ 0 27  0 16]
 [ 0 23  0 29]]


In [36]:
# per class accuracy
print('per class accuracy:')
for i in range(len(labels)):
    total = confusion[i].sum()
    correct = confusion[i][i]
    print(f'{labels[i]}: {correct / total:.3f}')

per class accuracy:
blazer: 0.000
cardigan: 0.643
coat: 0.000
jacket: 0.558


### Q5

The accuracy of the three models is Q2 > Q3 > Q4. This indicates that the pretrained model is effective in enhancing the model performance. Moreover, updating all model weights allows the model to fit better than the case that only the fully connected layer is modified.