In [None]:
# Download the data
!wget https://people.cs.pitt.edu/~mzhang/cs1699/pacs_dataset.zip
!unzip pacs_dataset.zip

In [47]:
import sys, os, copy, collections
import numpy as np
import pandas as pd
from skimage import io

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

torch.cuda.is_available()

True

In [44]:
# default configuration
task_type = 'training'
experiment_name = 'exp'
label_type = 'domain'
learning_rate = 1e-3
weight_decay = 0
batch_size = 128
epochs = 5
LABEL_SIZE = {'domain': 4, 'category': 7}

In [4]:
class PACSDataset(Dataset):

    def __init__(
        self,
        root_dir,
        label_type='domain',
        is_train=False,
        transform=None
    ):
        self.root_dir = os.path.join(root_dir, 'train' if is_train else 'val')
        self.label_type = label_type
        self.is_train = is_train
        
        if transform:
            self.transform = transform
        else:
            self.transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.7659, 0.7463, 0.7173],
                    std=[0.3089, 0.3181, 0.3470]
                ),
            ])

        self.dataset, self.label_list = self.initialize_dataset()
        self.label_to_id = {x: i for i, x in enumerate(self.label_list)}
        self.id_to_label = {i: x for i, x in enumerate(self.label_list)}

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        label_id = self.label_to_id[label]
        image = self.transform(image)
        return image, label_id

    def initialize_dataset(self):
        
        assert os.path.isdir(self.root_dir), \
            f'`root_dir` is not found at {self.root_dir}'

        dataset = []
        domain_set = set()
        category_set = set()
        count = 0

        for root, dirs, files in os.walk(self.root_dir, topdown=True):
            if files:
                _, domain, category = root.rsplit('/', maxsplit=2)
                domain_set.add(domain)
                category_set.add(category)
                pbar = tqdm(files)
                for name in pbar:
                    pbar.set_description(
                        f'Processing Folder: domain={domain}, category={category}'
                    )
                    img_array = io.imread(os.path.join(root, name))
                    dataset.append((img_array, domain, category))

        images, domains, categories = zip(*dataset)

        if self.label_type == 'domain':
            labels = sorted(domain_set)
            dataset = list(zip(images, domains))
        elif self.label_type == 'category':
            labels = sorted(category_set)
            dataset = list(zip(images, categories))
        else:
            raise ValueError(
                'Unknown `label_type`: Expecting `domain` or `category`.'
            )

        return dataset, labels


In [5]:
# create datasets and data loaders
train_dataset = PACSDataset(
    root_dir='pacs_dataset', label_type=label_type, is_train=True
)
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=4
)
val_dataset = PACSDataset(
    root_dir='pacs_dataset', label_type=label_type, is_train=False
)
val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=4
)

Processing Folder: domain=sketch, category=giraffe: 100%|██████████| 681/681 [00:16<00:00, 40.28it/s]
Processing Folder: domain=sketch, category=person: 100%|██████████| 143/143 [00:02<00:00, 47.71it/s]
Processing Folder: domain=sketch, category=house: 100%|██████████| 75/75 [00:01<00:00, 62.37it/s]
Processing Folder: domain=sketch, category=guitar: 100%|██████████| 564/564 [00:08<00:00, 69.81it/s] 
Processing Folder: domain=sketch, category=dog: 100%|██████████| 697/697 [00:15<00:00, 45.42it/s]
Processing Folder: domain=sketch, category=horse: 100%|██████████| 736/736 [00:12<00:00, 59.48it/s]
Processing Folder: domain=sketch, category=elephant: 100%|██████████| 674/674 [00:13<00:00, 50.86it/s]
Processing Folder: domain=art_painting, category=giraffe: 100%|██████████| 254/254 [00:07<00:00, 32.96it/s]
Processing Folder: domain=art_painting, category=person: 100%|██████████| 404/404 [00:09<00:00, 41.51it/s]
Processing Folder: domain=art_painting, category=house: 100%|██████████| 262/262 

In [45]:
class AlexNet(nn.Module):
    
    def __init__(self, config=None):
        super().__init__()
        self.config = config
        self.features = nn.Sequential(
            
            nn.Conv2d(3, 96, kernel_size=11, stride=4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, LABEL_SIZE[label_type])
        )
        
    def forward(self, x):
        z = self.features(x).flatten(1)
        return self.classifier(z)

AlexNet().forward(torch.zeros(1, 3, 227, 227)).shape

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
 

In [54]:
def model_training(model_class):

    best_model = None
    best_acc = 0.0

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    expt_name = 'experiments/{}/{}_lr_{}.wd_{}'.format(experiment_name, label_type, learning_rate, weight_decay)

    os.makedirs(expt_name, exist_ok=True)
    writer = SummaryWriter(log_dir=expt_name)

    configs = {'num_classes': LABEL_SIZE[label_type], 'dropout': 0.5}

    model = model_class(configs).to(device)

    print('Model Architecture:\n%s' % model)

    criterion = nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    try:
        for epoch in range(epochs):
            for phase in ('train', 'eval'):
                if phase == 'train':
                    model.train()
                    dataset = train_dataset
                    data_loader = train_loader
                else:
                    model.eval()
                    dataset = val_dataset
                    data_loader = val_loader

                running_loss = 0.0
                running_corrects = 0

                for step, (images, labels) in enumerate(tqdm(data_loader, file=sys.stdout)):
                    images = images.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(images)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                            full_step = epoch * len(dataset) + step
                            writer.add_scalar('Loss/{}'.format(phase), loss.item(), full_step)

                    running_loss += loss.item() * images.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / len(dataset)
                epoch_acc = running_corrects.double() / len(dataset)
                writer.add_scalar('Epoch_Loss/{}'.format(phase), epoch_loss, epoch)
                writer.add_scalar('Epoch_Accuracy/{}'.format(phase), epoch_acc, epoch)
                print('[Epoch {}/{}] {} accuracy: {:.4f}, loss: {:.4f}'.format(
                    epoch+1, epochs, phase, epoch_acc, epoch_loss
                ))

                if phase == 'eval':
                    if epoch_acc > best_acc:
                        best_acc = epoch_acc
                        best_model = copy.deepcopy(model.state_dict())
                        torch.save(best_model, os.path.join(expt_name, 'best_model.pt'))

    except KeyboardInterrupt:
        pass

    return


model_training(AlexNet)

Model Architecture:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0

# Part b - Enhanced AlexNet

In [55]:
class AlexNetLargerKernel(nn.Module):
    
    def __init__(self, config=None):
        super().__init__()
        self.config = config
        self.features = nn.Sequential(
            
            nn.Conv2d(3, 96, kernel_size=21, stride=8),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(96, 256, kernel_size=7, stride=2, padding=2),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, LABEL_SIZE[label_type])
        )
        
    def forward(self, x):
        z = self.features(x).flatten(1)
        return self.classifier(z)

AlexNetLargerKernel().forward(torch.zeros(1, 3, 227, 227)).shape

torch.Size([1, 4])

In [56]:
model_training(AlexNetLargerKernel)

Model Architecture:
AlexNetLargerKernel(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(21, 21), stride=(8, 8))
    (1): ReLU(inplace=True)
    (2): Conv2d(96, 256, kernel_size=(7, 7), stride=(2, 2), padding=(2, 2))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (9): ReLU(inplace=True)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=4, bias=True)
  )
)
100%|██████████| 71/71 [00:18<00:00,  3.81it/s]
[Epoch 1/5] train

In [57]:
class AlexNetAvgPooling(nn.Module):
    
    def __init__(self, config=None):
        super().__init__()
        self.config = config
        self.features = nn.Sequential(
            
            nn.Conv2d(3, 96, kernel_size=11, stride=4),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            
            nn.AvgPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, LABEL_SIZE[label_type])
        )
        
    def forward(self, x):
        z = self.features(x).flatten(1)
        return self.classifier(z)

AlexNetAvgPooling().forward(torch.zeros(1, 3, 227, 227)).shape

torch.Size([1, 4])

In [58]:
model_training(AlexNetAvgPooling)

Model Architecture:
AlexNetAvgPooling(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): AvgPool2d(kernel_size=3, stride=2, padding=0)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): AvgPool2d(kernel_size=3, stride=2, padding=0)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): AvgPool2d(kernel_size=3, stride=2, padding=0)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=