## Statistical Learning and Deep Learning HW5

### Q1

In [31]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models

In [17]:
datasets = ['train', 'valid' ,'test']
labels = ['blazer', 'cardigan', 'coat', 'jacket']
base_path = './photos'

In [18]:
df = pd.DataFrame(columns=labels, index=datasets)
for ds in datasets:
    for lb in labels:
        df[lb][ds] = len(os.listdir(f'{base_path}/{ds}/{lb}/'))
df['total'] = df.sum(axis=1)
print(df)

      blazer cardigan coat jacket   total
train     98      238  297    412  1045.0
valid      7       36   27     35   105.0
test       9       42   43     52   146.0


In [19]:
print('Ratio:')
df = df.drop(['total'], axis=1)
print (df.div(df.sum(axis=1), axis=0))

Ratio:
         blazer  cardigan      coat    jacket
train  0.093780  0.227751  0.284211  0.394258
valid  0.066667  0.342857  0.257143  0.333333
test   0.061644  0.287671  0.294521  0.356164


Given the number of instances of each image type, I suggest that the accuracy of the classification task will be jacket > coat > cardigan > blazer. This follows the hypothesis that larger number of instances in training set causes higher classification accuracy.

### Q2

In [20]:
# Image transformations
image_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.RandomResizedCrop(size=(224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=20),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
    'valid':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224)
    ]),
    
    'test':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224)
    ]),
}

In [21]:
# Datasets from folders
data = {
    'train':
        ImageFolder(root=f'{base_path}/train/', transform=image_transforms['train']),
    'valid':
        ImageFolder(root=f'{base_path}/valid/', transform=image_transforms['valid']),
    'test':
        ImageFolder(root=f'{base_path}/test/', transform=image_transforms['test'])
}

In [28]:
# Dataloader
batch_size = 32
max_epoch = 200
early_stop_patient = 20
dataloaders = {
    'train':
        DataLoader(data['train'], batch_size=batch_size, shuffle=True, num_workers=10),
    'valid':
        DataLoader(data['valid'], batch_size=batch_size, shuffle=True, num_workers=10),
    'test':
        DataLoader(data['test'], batch_size=batch_size, shuffle=True, num_workers=10)
}

In [35]:
# load pretrained resnet50 and set the output dimension to 4
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(model.fc.in_features, 4)