## Riyad Bin Rafiq 
## 05/03/2022

** Report is at the end of the notebook

In [1]:
import collections
import copy
import os

import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from absl import app, flags
from skimage import io
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%cd /content/drive/MyDrive/PhD courses/CSCE 5218 Deep Learning/Assignments/Assignment 2

/content/drive/MyDrive/PhD courses/CSCE 5218 Deep Learning/Assignments/Assignment 2


In [None]:
!unzip $'/content/drive/MyDrive/PhD courses/CSCE 5218 Deep Learning/Assignments/Assignment 2/pacs_dataset.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: pacs_dataset/train/photo/house/pic_241.jpg  
  inflating: pacs_dataset/train/photo/house/pic_243.jpg  
  inflating: pacs_dataset/train/photo/house/pic_244.jpg  
  inflating: pacs_dataset/train/photo/house/pic_246.jpg  
  inflating: pacs_dataset/train/photo/house/pic_247.jpg  
  inflating: pacs_dataset/train/photo/house/pic_248.jpg  
  inflating: pacs_dataset/train/photo/house/pic_249.jpg  
  inflating: pacs_dataset/train/photo/house/pic_250.jpg  
  inflating: pacs_dataset/train/photo/house/pic_253.jpg  
  inflating: pacs_dataset/train/photo/house/pic_256.jpg  
  inflating: pacs_dataset/train/photo/house/pic_257.jpg  
  inflating: pacs_dataset/train/photo/house/pic_259.jpg  
  inflating: pacs_dataset/train/photo/house/pic_260.jpg  
  inflating: pacs_dataset/train/photo/house/pic_261.jpg  
  inflating: pacs_dataset/train/photo/house/pic_262.jpg  
  inflating: pacs_dataset/train/photo/house/pic_263.jpg  
  infla

In [4]:
flags.DEFINE_string('f','','kernel')

<absl.flags._flagvalues.FlagHolder at 0x7f6dd875af50>

In [5]:
FLAGS = flags.FLAGS

flags.DEFINE_enum('task_type', 'analysis', ['training', 'analysis'],
                  'Specifies the task type.')

# Hyperparameters for Part I
flags.DEFINE_float('learning_rate', 1e-3, 'Learning rate.')
flags.DEFINE_float('weight_decay', 0, 'Weight decay (L2 regularization).')
flags.DEFINE_integer('batch_size', 64, 'Number of examples per batch.')
flags.DEFINE_integer('epochs', 30, 'Number of epochs for training.')
flags.DEFINE_string('experiment_name', 'alexnet_kernel', 'Defines experiment name.')
flags.DEFINE_enum('label_type', 'category', ['domain', 'category'],
                  'Specifies prediction task.')

# Hyperparemeters for Part III
flags.DEFINE_string('model_checkpoint', '',
                    'Specifies the checkpont for analyzing.')

LABEL_SIZE = {'domain': 4, 'category': 7}

In [6]:
class PACSDataset(Dataset):

  def __init__(self,
               root_dir,
               label_type='domain',
               is_training=False,
               transform=None):
    self.root_dir = os.path.join(root_dir, 'train' if is_training else 'val')
    self.label_type = label_type
    self.is_training = is_training
    if transform:
      self.transform = transform
    else:
      self.transform = transforms.Compose([
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.7659, 0.7463, 0.7173],
                               std=[0.3089, 0.3181, 0.3470]),
      ])

    self.dataset, self.label_list = self.initialize_dataset()
    self.label_to_id = {x: i for i, x in enumerate(self.label_list)}
    self.id_to_label = {i: x for i, x in enumerate(self.label_list)}

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    image, label = self.dataset[idx]
    label_id = self.label_to_id[label]
    image = self.transform(image)
    return image, label_id

  def initialize_dataset(self):
    assert os.path.isdir(self.root_dir), \
        '`root_dir` is not found at %s' % self.root_dir

    dataset = []
    domain_set = set()
    category_set = set()
    cnt = 0

    for root, dirs, files in os.walk(self.root_dir, topdown=True):
      if files:
        _, domain, category = root.rsplit('/', maxsplit=2)
        domain_set.add(domain)
        category_set.add(category)
        pbar = tqdm(files)
        for name in pbar:
          pbar.set_description('Processing Folder: domain=%s, category=%s' %
                               (domain, category))
          img_array = io.imread(os.path.join(root, name))
          dataset.append((img_array, domain, category))

    images, domains, categories = zip(*dataset)

    if self.label_type == 'domain':
      labels = sorted(domain_set)
      dataset = list(zip(images, domains))
    elif self.label_type == 'category':
      labels = sorted(category_set)
      dataset = list(zip(images, categories))
    else:
      raise ValueError(
          'Unknown `label_type`: Expecting `domain` or `category`.')

    return dataset, labels

In [7]:
class AlexNet(nn.Module):

  def __init__(self, configs):
    super().__init__()
    self.configs = configs
    self.features = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=11, stride=4),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=3, stride=2),
    )
    self.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(256 * 6 * 6, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Linear(4096, self.configs["num_classes"]),
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x


class AlexNetLargeKernel(nn.Module):

  def __init__(self, configs):
    super().__init__()
    self.configs = configs
    self.features = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=21, stride=8, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(96, 256, kernel_size=7, stride=2, padding=2),
    nn.ReLU(inplace=True),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 256, kernel_size=3, stride=2),
    nn.ReLU(inplace=True),
    )
    self.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(256 * 6 * 6, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Linear(4096, self.configs["num_classes"]),
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x


class AlexNetAvgPooling(nn.Module):

  def __init__(self, configs):
    super().__init__()
    self.configs = configs
    self.features = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=11, stride=4),
    nn.ReLU(inplace=True),
    nn.AvgPool2d(3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(inplace=True),
    nn.AvgPool2d(3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.AvgPool2d(3, stride=2),
    )
    self.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(256 * 6 * 6, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Linear(4096, self.configs["num_classes"]),
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x

In [8]:
def visualize_kernels(kernel_name,
                      kernel_weight,
                      max_in_channels=12,
                      max_out_channels=12,
                      saving_prefix='kernel'):
  """A helper function to visualize the learned convolutional kernels.
  
  Args:
    kernel_name: str, the name of the kernel being visualized. It will be used
        as the filename in the saved figures.
    kernel_weight: torch.Tensor or np.ndarray, the weights of convolutional
        kernel. The shape should be
        [out_channels, in_channels, kernel_height, kernel_width].
    max_in_channels: int, optional, the max in_channels in the visualization.
    max_out_channels: int, optional, the max out_channels in the visualization.
    saving_prefix: str, optional, the directory for saving the visualization.
  """
  print('Visualize the learned filter of `%s`' % kernel_name)
  if isinstance(kernel_weight, torch.Tensor):
    kernel_weight = kernel_weight.cpu().numpy()

  kernel_shape = list(kernel_weight.shape)

  nrows = min(max_in_channels, kernel_shape[1])
  ncols = min(max_out_channels, kernel_shape[0])

  fig, axes = plt.subplots(nrows, ncols, figsize=(ncols, nrows))

  for r in range(nrows):
    for c in range(ncols):
      kernel = kernel_weight[c, r, :, :]
      vmin, vmax = kernel.min(), kernel.max()
      normalized_kernel = (kernel - vmin) / (vmax - vmin)
      sns.heatmap(normalized_kernel,
                  cbar=False,
                  square=True,
                  xticklabels=False,
                  yticklabels=False,
                  ax=axes[r, c])

  #plt.xlabel('First %d In-Channels' % nrows)
  #plt.ylabel('First %d Out-Channels' % ncols)

  plt.tight_layout()
  plt.savefig(os.path.join(saving_prefix, kernel_name.lower() + '.png'))
  return

In [10]:
from numpy import empty
def analyze_model_kernels():
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  configs = {'num_classes': LABEL_SIZE[FLAGS.label_type]}
  file_path = 'experiments/alexnet_kernel/category_lr_0.001.wd_0.0'
  model = AlexNetLargeKernel(configs).to(device)
  model.load_state_dict(torch.load(os.path.join(file_path,'best_model.pt')))
  i=1
  for param_tensor in model.state_dict():
    if len(model.state_dict()[param_tensor].size())==4:
      print(param_tensor, "\t", model.state_dict()[param_tensor].size())
      visualize_kernels("Convolution"+str(i), model.state_dict()[param_tensor])
      i=i+1
  return

In [8]:
def model_training():
  train_dataset = PACSDataset(root_dir='pacs_dataset',
                              label_type=FLAGS.label_type,
                              is_training=True)
  train_loader = DataLoader(train_dataset,
                            batch_size=FLAGS.batch_size,
                            shuffle=True,
                            num_workers=2)

  val_dataset = PACSDataset(root_dir='pacs_dataset',
                            label_type=FLAGS.label_type,
                            is_training=False)
  val_loader = DataLoader(val_dataset,
                          batch_size=FLAGS.batch_size,
                          shuffle=False,
                          num_workers=2)

  best_model = None
  best_acc = 0.0

  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

  experiment_name = 'experiments/{}/{}_lr_{}.wd_{}'.format(
      FLAGS.experiment_name, FLAGS.label_type, FLAGS.learning_rate,
      FLAGS.weight_decay)
  print(experiment_name)
  os.makedirs(experiment_name, exist_ok=True)
  writer = SummaryWriter(log_dir=experiment_name)

  configs = {'num_classes': LABEL_SIZE[FLAGS.label_type]}

  ############################################################################
  """After implementing all required models, you can switch from here."""
  # model = AlexNet(configs).to(device)
  model = AlexNetLargeKernel(configs).to(device)
  # model = AlexNetAvgPooling(configs).to(device)
  ############################################################################

  print('Model Architecture:\n%s' % model)

  criterion = nn.CrossEntropyLoss(reduction='mean')
  optimizer = torch.optim.Adam(model.parameters(),
                               lr=FLAGS.learning_rate,
                               weight_decay=FLAGS.weight_decay)

  try:
    for epoch in range(FLAGS.epochs):
      for phase in ('train', 'eval'):
        if phase == 'train':
          model.train()
          dataset = train_dataset
          data_loader = train_loader
        else:
          model.eval()
          dataset = val_dataset
          data_loader = val_loader

        running_loss = 0.0
        running_corrects = 0

        for step, (images, labels) in enumerate(data_loader):
          images = images.to(device)
          labels = labels.to(device)

          optimizer.zero_grad()

          with torch.set_grad_enabled(phase == 'train'):
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            if phase == 'train':
              loss.backward()
              optimizer.step()

              writer.add_scalar('Loss/{}'.format(phase), loss.item(),
                                epoch * len(data_loader) + step)

          running_loss += loss.item() * images.size(0)
          running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataset)
        epoch_acc = running_corrects.double() / len(dataset)
        writer.add_scalar('Epoch_Loss/{}'.format(phase), epoch_loss, epoch)
        writer.add_scalar('Epoch_Accuracy/{}'.format(phase), epoch_acc, epoch)
        print('[Epoch %d] %s accuracy: %.4f, loss: %.4f' %
              (epoch + 1, phase, epoch_acc, epoch_loss))

        if phase == 'eval':
          if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = copy.deepcopy(model.state_dict())
            torch.save(best_model, os.path.join(experiment_name,
                                                'best_model.pt'))

  except KeyboardInterrupt:
    pass

  return

In [11]:
def main(unused_argvs):
  if FLAGS.task_type == 'training':
    model_training()
  elif FLAGS.task_type == 'analysis':
    analyze_model_kernels()
  else:
    raise ValueError('Unknown `task_type`: %s' % FLAGS.task_type)


In [12]:
if __name__ == '__main__':
  app.run(main)

Processing Folder: domain=photo, category=giraffe: 100%|██████████| 165/165 [00:00<00:00, 170.66it/s]
Processing Folder: domain=photo, category=horse: 100%|██████████| 186/186 [00:01<00:00, 184.29it/s]
Processing Folder: domain=photo, category=house: 100%|██████████| 243/243 [00:01<00:00, 180.47it/s]
Processing Folder: domain=photo, category=person: 100%|██████████| 383/383 [00:02<00:00, 171.65it/s]
Processing Folder: domain=photo, category=guitar: 100%|██████████| 167/167 [00:00<00:00, 167.98it/s]
Processing Folder: domain=photo, category=dog: 100%|██████████| 169/169 [00:00<00:00, 172.53it/s]
Processing Folder: domain=photo, category=elephant: 100%|██████████| 181/181 [00:00<00:00, 182.25it/s]
Processing Folder: domain=art_painting, category=giraffe: 100%|██████████| 254/254 [00:01<00:00, 182.24it/s]
Processing Folder: domain=art_painting, category=horse: 100%|██████████| 179/179 [00:01<00:00, 175.53it/s]
Processing Folder: domain=art_painting, category=house: 100%|██████████| 262/26

Model Architecture:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0

###1.1 Model architecture

AlexNet(\
  (features): Sequential( \
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))\
    (1): ReLU(inplace=True)\
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\
    (4): ReLU(inplace=True)\
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (7): ReLU(inplace=True)\
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (9): ReLU(inplace=True)\
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (11): ReLU(inplace=True)\
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\
  )\
  (classifier): Sequential(\
    (0): Dropout(p=0.5, inplace=False)\
    (1): Linear(in_features=9216, out_features=4096, bias=True)\
    (2): ReLU(inplace=True)\
    (3): Dropout(p=0.5, inplace=False)\
    (4): Linear(in_features=4096, out_features=4096, bias=True)\
    (5): ReLU(inplace=True)\
    (6): Linear(in_features=4096, out_features=4, bias=True)\
  )\
)

### Hyperparameter tuning
1.   Batch size: 128, learning rate: 0.001, Validation accuracy: 90.87%
2.   Batch size: 64, learning rate: 0.001, Validation accuracy: 91.49%
3.   Batch size: 32, learning rate: 0.001, Validation accuracy: 88.28%
4.   Batch size: 128, learning rate: 0.01, Validation accuracy: 78.42%
5.   Batch size: 64, learning rate: 0.01, Validation accuracy: 37.24%



### 2.1.1 Model architecture

AlexNetLargeKernel(\
  (features): Sequential(\
    (0): Conv2d(3, 96, kernel_size=(21, 21), stride=(8, 8), padding=(1, 1))\
    (1): ReLU(inplace=True)\
    (2): Conv2d(96, 256, kernel_size=(7, 7), stride=(2, 2), padding=(2, 2))\
    (3): ReLU(inplace=True)\
    (4): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (5): ReLU(inplace=True)\
    (6): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (7): ReLU(inplace=True)\
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(2, 2))\
    (9): ReLU(inplace=True)\
  )\
  (classifier): Sequential(\
    (0): Dropout(p=0.5, inplace=False)\
    (1): Linear(in_features=9216, out_features=4096, bias=True)\
    (2): ReLU(inplace=True)\
    (3): Dropout(p=0.5, inplace=False)\
    (4): Linear(in_features=4096, out_features=4096, bias=True)\
    (5): ReLU(inplace=True)\
    (6): Linear(in_features=4096, out_features=4, bias=True)\
  )\
)

### Validation accuracy - 87.45%

### 2.1.2 Model architecture

AlexNetAvgPooling(\
  (features): Sequential(\
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))\
    (1): ReLU(inplace=True)\
    (2): AvgPool2d(kernel_size=3, stride=2, padding=0)\
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\
    (4): ReLU(inplace=True)\
    (5): AvgPool2d(kernel_size=3, stride=2, padding=0)\
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (7): ReLU(inplace=True)\
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (9): ReLU(inplace=True)\
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\
    (11): ReLU(inplace=True)\
    (12): AvgPool2d(kernel_size=3, stride=2, padding=0)\
  )\
  (classifier): Sequential(\
    (0): Dropout(p=0.5, inplace=False)\
    (1): Linear(in_features=9216, out_features=4096, bias=True)\
    (2): ReLU(inplace=True)\
    (3): Dropout(p=0.5, inplace=False)\
    (4): Linear(in_features=4096, out_features=4096, bias=True)\
    (5): ReLU(inplace=True)\
    (6): Linear(in_features=4096, out_features=4, bias=True)\
  )\
)

### Validation accuracy - 88.90%

### 3.1 Kernel visualization

#### Domain-specific learned kernels for AlexNet: https://drive.google.com/drive/folders/1OgisdMJSPwYr6Pl4h9PW9rWLNsij_Ivz?usp=sharing

#### Domain-specific learned kernels for AlexNetLargeKernel: https://drive.google.com/drive/folders/18-yredxoq0x_4k5oScGFYDhk2urIRwmt?usp=sharing

#### Category-specific learned kernels for AlexNet: https://drive.google.com/drive/folders/1Hzm72EeqylEgfKghb3B1lXDMMXT-eEvF?usp=sharing

#### Category-specific learned kernels for AlexNetLargeKernel: https://drive.google.com/drive/folders/1wGt6HnrUr8Lhvy8UZRenTja5Hy5JtQOM?usp=sharing