In [1]:
root_dir = "/content/drive/Othercomputers/Mac/Mila/Winter_2023/ift6759_project/"

%matplotlib inline
%load_ext autoreload
%autoreload 2

from google.colab import drive
import sys

drive.mount('/content/drive')
sys.path.append(root_dir)

Mounted at /content/drive


In [2]:
import torch
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import Dataset, random_split
from PIL import Image
import os
import pandas as pd

# transform = transforms.Compose([
#     transforms.RandomRotation(30),      # rotate +/- 30 degrees
#     transforms.RandomHorizontalFlip(),  # rHorizontally flip the given image randomly with a given probability (default p=0.5)
#     #transforms.RandomVerticalFlip() #Vertically flip the given image randomly with a given probability (default p=0.5), not recommended for medical images
#     transforms.Resize((224, 224)),       #  be sure to pass in a list or a tuple
#     transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
#     transforms.RandomAdjustSharpness(1.5, p=0.5), #
#     transforms.RandomAdjustSharpness(0.5, p=0.5),
#     transforms.RandomAutocontrast(p=0.5),
#     transforms.RandomEqualize(p=0.5),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406],
#                           [0.229, 0.224, 0.225])
# ])

transform = transforms.Compose([transforms.RandomHorizontalFlip(),
  # transforms.RandomResizedCrop((32,32),scale=(0.8,1.0),ratio=(0.9,1.1)),
  transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
  transforms.ToTensor(),
  transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
])

class CovidSeverityDataset(Dataset):
    def __init__(self, root_dir, transform = transform, split_lengths = [0.7, 0.1, 0.2], split_seed = 42, batch_size = 10, shuffle = True, num_workers = [0,0,0]):
      self.root_dir = root_dir
      self.csv_path = self.root_dir + "data_processing/combined_cxr_metadata.csv"
      self.data_path = self.root_dir + "processed_images/"
      self.transform = transform
      self.split_lengths = split_lengths
      self.split_seed = split_seed
      self.batch_size = batch_size
      self.shuffle = shuffle
      self.num_workers = num_workers
      self.dataframe = pd.read_csv(self.csv_path, index_col=0)
  
    def __len__(self):
      return len(self.dataframe)

    def __getitem__(self, index):
      img_path = os.path.join(self.data_path, self.dataframe.iloc[index, 0])
      image = Image.open(img_path)
      y_label = torch.tensor(self.dataframe.iloc[index, 1])
      if self.transform:
        image = self.transform(image)
      else:
        convert_tensor = transforms.PILToTensor()
        image = convert_tensor(image)
      return (image, y_label)
  
    def get_subsets(self):
      subsets = random_split(self, self.split_lengths, generator=torch.Generator().manual_seed(self.split_seed))
      train = torch.utils.data.DataLoader(dataset=subsets[0], batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_workers[0], drop_last=True, pin_memory=True)
      val = torch.utils.data.DataLoader(dataset=subsets[1], batch_size=self.batch_size, shuffle=False, drop_last=False, num_workers=self.num_workers[1])
      test = torch.utils.data.DataLoader(dataset=subsets[2], batch_size=self.batch_size, shuffle=False, drop_last=False, num_workers=self.num_workers[2])
      return train, val, test


# How to use this class?
# root_dir = "/content/drive/MyDrive/Mila/Winter_2023/ift6759_project/"
# dataset = CovidSeverityDataset(root_dir, transform = False)
# train, val, test = dataset.get_subsets()

In [None]:
# from covidSeverityDataset import CovidSeverityDataset

# dataset = CovidSeverityDataset(root_dir, transform = False, batch_size = 10, shuffle = True, num_workers = [2,0,0])
# dataset = CovidSeverityDataset(root_dir, batch_size = 100, shuffle = True, num_workers = [4,2,2])
# train, val, test = dataset.get_subsets()

In [None]:
# ludo = next(enumerate(train)) 
# print(ludo[1])

In [3]:
import torch
from typing import List, Tuple
from torch import nn
from torch.nn.parameter import Parameter
import torch.nn.functional as F

class MLP(torch.nn.Module):
    # 20 points
    def __init__(self, input_size: int, hidden_sizes: List[int], num_classes: int, activation: str = "relu"):
        super(MLP, self).__init__() 
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        assert len(hidden_sizes) > 1, "You should at least have one hidden layer"
        self.num_classes = num_classes
        self.activation = activation
        assert activation in ['tanh', 'relu', 'sigmoid'], "Invalid choice of activation"
        self.hidden_layers, self.output_layer = self._build_layers(input_size, hidden_sizes, num_classes)
        
        # Initializaton
        self._initialize_linear_layer(self.output_layer)
        for layer in self.hidden_layers:
            self._initialize_linear_layer(layer)
    
    def _build_layers(self, input_size: int, 
                        hidden_sizes: List[int], 
                        num_classes: int) -> Tuple[nn.ModuleList, nn.Module]:
        """
        Build the layers for MLP. Be ware of handlling corner cases.
        :param input_size: An int
        :param hidden_sizes: A list of ints. E.g., for [32, 32] means two hidden layers with 32 each.
        :param num_classes: An int
        :Return:
            hidden_layers: nn.ModuleList. Within the list, each item has type nn.Module
            output_layer: nn.Module
        """
        hidden_layers = nn.ModuleList()
        for i, hidden_size in enumerate(hidden_sizes):
            if i == 0:
                hidden_layers.append(nn.Linear(input_size, hidden_size))
            else:
                hidden_layers.append(nn.Linear(hidden_sizes[i-1], hidden_size))
        output_layer = nn.Linear(hidden_sizes[-1], num_classes)
        return hidden_layers, output_layer
    
    def activation_fn(self, activation, inputs: torch.Tensor) -> torch.Tensor:
        """ process the inputs through different non-linearity function according to activation name """
        if activation == "relu":
            return F.relu(inputs)
        elif activation == "tanh":
            return torch.tanh(inputs)
        elif activation == "sigmoid":
            return torch.sigmoid(inputs)
        else:
            raise ValueError("Invalid activation function")
        
    def _initialize_linear_layer(self, module: nn.Linear) -> None:
        """ For bias set to zeros. For weights set to glorot normal """
        nn.init.zeros_(module.bias)
        nn.init.xavier_normal_(module.weight)
        
    def forward(self, images: torch.Tensor) -> torch.Tensor:
        """ Forward images and compute logits.
        1. The images are first fattened to vectors. 
        2. Forward the result to each layer in the self.hidden_layer with activation_fn
        3. Finally forward the result to the output_layer.
        
        :param images: [batch, channels, width, height]
        :return logits: [batch, num_classes]
        """
        # 1. The images are first fattened to vectors. 
        x = images.view(images.size(0), -1)

        # 2. Forward the result to each layer in the self.hidden_layer with activation_fn
        for layer in self.hidden_layers:
            x = self.activation_fn(self.activation, layer(x))

        # 3. Finally forward the result to the output_layer.
        logits = self.output_layer(x)

        return logits


In [4]:
from torch import optim

model_config = {
    # "input_size": 3072,
    "input_size": 150528,
    # "hidden_sizes": [1024, 512, 64, 64],
    "hidden_sizes": [256, 128, 16, 16],
    "num_classes": 7,
    "activation": "relu"
}

# Optimization
optimizer: str = 'adamw'  # [sgd, momentum, adam, adamw]
lr: float = 1e-3
# momentum: float = 0.9
weight_decay: float = 5e-2
batch_size: int = 4
device = "cuda"
epochs: int = 25
print_every: int = 100

model = MLP(**model_config)
model.to(device)

optimizer = optim.AdamW(
    # model.parameters(), lr=lr, weight_decay=weight_decay
    model.parameters()
)

print(
    f"Initialized MLP model with {sum(p.numel() for p in model.parameters())} "
    f"total parameters, of which {sum(p.numel() for p in model.parameters() if p.requires_grad)} are learnable."
)


Initialized MLP model with 38570775 total parameters, of which 38570775 are learnable.


In [5]:
def to_device(tensors, device):
    if isinstance(tensors, torch.Tensor):
        return tensors.to(device=device)
    elif isinstance(tensors, dict):
        return dict(
            (key, to_device(tensor, device)) for (key, tensor) in tensors.items()
        )
    elif isinstance(tensors, list):
        return list(
            (to_device(tensors[0], device), to_device(tensors[1], device)))
    else:
        raise NotImplementedError("Unknown type {0}".format(type(tensors)))


# def cross_entropy_loss(logits: torch.Tensor, labels: torch.Tensor):
#     """ Return the mean loss for this batch
#     :param logits: [batch_size, num_class]
#     :param labels: [batch_size]
#     :return loss 
#     """
#     # convert labels to one-hot encoding
#     one_hot = torch.zeros_like(logits)
#     one_hot.scatter_(1, labels.unsqueeze(1), 1)
    
#     # calculate cross-entropy loss
#     log_softmax = logits - torch.logsumexp(logits, dim=1, keepdim=True)
#     loss = -torch.sum(one_hot * log_softmax, dim=1).mean()

#     return loss

def compute_accuracy(logits: torch.Tensor, labels: torch.Tensor):
    """ Compute the accuracy of the batch """
    acc = (logits.argmax(dim=1) == labels).float().mean()
    return acc


In [6]:
from torch.utils.data import DataLoader
import time
import os

def train(epoch, model, dataloader, optimizer):
    model.train()
    total_iters = 0
    epoch_accuracy = 0
    epoch_loss = 0
    start_time = time.time()
    criterion = nn.CrossEntropyLoss()

    for idx, batch in enumerate(dataloader):
        batch = to_device(batch, device)
        optimizer.zero_grad()
        imgs, labels = batch
        logits = model(imgs)
        # loss = cross_entropy_loss(logits, labels)
        labels = labels.to(torch.int64)
        loss = criterion(logits, labels)     

        acc = compute_accuracy(logits, labels)

        loss.backward()
        optimizer.step()
        epoch_accuracy += acc.item() / len(dataloader)
        epoch_loss += loss.item() / len(dataloader)
        total_iters += 1

        if idx % print_every == 0:
            tqdm.write(f"[TRAIN] Epoch: {epoch}, Iter: {idx}, Loss: {loss.item():.5f}")
    tqdm.write(f"== [TRAIN] Epoch: {epoch}, Accuracy: {epoch_accuracy:.3f} ==>")
    return epoch_loss, epoch_accuracy, time.time() - start_time

def evaluate(epoch, model, dataloader, mode="val"):
    model.eval()
    epoch_accuracy=0
    epoch_loss=0
    total_iters = 0
    start_time = time.time()
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            batch = to_device(batch, device)
            imgs, labels = batch
            logits = model(imgs)
            # loss = cross_entropy_loss(logits, labels)
            labels = labels.to(torch.int64)
            loss = criterion(logits, labels)   
            acc = compute_accuracy(logits, labels)
            epoch_accuracy += acc.item() / len(dataloader)
            epoch_loss += loss.item() / len(dataloader)
            total_iters += 1
            if idx % print_every == 0:
                tqdm.write(
                    f"[{mode.upper()}] Epoch: {epoch}, Iter: {idx}, Loss: {loss.item():.5f}"
                )
        tqdm.write(
            f"=== [{mode.upper()}] Epoch: {epoch}, Iter: {idx}, Accuracy: {epoch_accuracy:.3f} ===>"
        )
    return epoch_loss, epoch_accuracy, time.time() - start_time


In [None]:
# subsets = torch.utils.data.random_split(dataset, [0.7, 0.1, 0.2], generator=torch.Generator().manual_seed(42))
# train_loader = torch.utils.data.DataLoader(subsets[0], batch_size=128, shuffle=True, num_workers=5)
# ludo = next(enumerate(train_loader))
# print(type(ludo[1][1][0]))



In [7]:
dataset = CovidSeverityDataset(root_dir, batch_size = 10, shuffle = True, num_workers = [0,0,0])
train_loader, val, test = dataset.get_subsets()

In [None]:
from tqdm import tqdm

train_losses, valid_losses = [], []
train_accs, valid_accs = [], []
train_times, valid_times = [], []

for epoch in range(epochs):
    tqdm.write(f"====== Epoch {epoch} ======>")
    loss, acc, wall_time = train(epoch, model, train_loader, optimizer)
    train_losses.append(loss)
    train_accs.append(acc)
    train_times.append(wall_time)

    loss, acc, wall_time = evaluate(epoch, model, val)
    valid_losses.append(loss)
    valid_accs.append(acc)
    valid_times.append(wall_time)

test_loss, test_acc, test_time = evaluate(
    epoch, model, test, mode="test"
)

[TRAIN] Epoch: 0, Iter: 0, Loss: 1.98694
[TRAIN] Epoch: 0, Iter: 100, Loss: 1.90601
[TRAIN] Epoch: 0, Iter: 200, Loss: 1.82309
[TRAIN] Epoch: 0, Iter: 300, Loss: 1.84193
[TRAIN] Epoch: 0, Iter: 400, Loss: 1.79679
[TRAIN] Epoch: 0, Iter: 500, Loss: 1.73221
== [TRAIN] Epoch: 0, Accuracy: 0.267 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.68666
=== [VAL] Epoch: 0, Iter: 71, Accuracy: 0.302 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.70409
[TRAIN] Epoch: 1, Iter: 100, Loss: 1.65994
[TRAIN] Epoch: 1, Iter: 200, Loss: 1.60702
[TRAIN] Epoch: 1, Iter: 300, Loss: 1.96063
[TRAIN] Epoch: 1, Iter: 400, Loss: 1.53165
[TRAIN] Epoch: 1, Iter: 500, Loss: 1.74248
== [TRAIN] Epoch: 1, Accuracy: 0.306 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.54446
=== [VAL] Epoch: 1, Iter: 71, Accuracy: 0.302 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.50636
[TRAIN] Epoch: 2, Iter: 100, Loss: 1.53105
[TRAIN] Epoch: 2, Iter: 200, Loss: 1.92881
[TRAIN] Epoch: 2, Iter: 300, Loss: 1.46402
[TRAIN] Epoch: 2, Iter: 400, Loss: 1.90726
[TRAIN] Epoch

In [None]:
from tqdm import tqdm

train_losses, valid_losses = [], []
train_accs, valid_accs = [], []
train_times, valid_times = [], []

for epoch in range(epochs):
    tqdm.write(f"====== Epoch {epoch} ======>")
    loss, acc, wall_time = train(epoch, model, train_loader, optimizer)
    train_losses.append(loss)
    train_accs.append(acc)
    train_times.append(wall_time)

    loss, acc, wall_time = evaluate(epoch, model, val)
    valid_losses.append(loss)
    valid_accs.append(acc)
    valid_times.append(wall_time)

test_loss, test_acc, test_time = evaluate(
    epoch, model, test, mode="test"
)

[TRAIN] Epoch: 0, Iter: 0, Loss: 1.77243
[TRAIN] Epoch: 0, Iter: 100, Loss: 2.43915
[TRAIN] Epoch: 0, Iter: 200, Loss: 1.75703
[TRAIN] Epoch: 0, Iter: 300, Loss: 1.57284
[TRAIN] Epoch: 0, Iter: 400, Loss: 1.50404
[TRAIN] Epoch: 0, Iter: 500, Loss: 1.50570
== [TRAIN] Epoch: 0, Accuracy: 0.299 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.44679
=== [VAL] Epoch: 0, Iter: 71, Accuracy: 0.303 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.30168
[TRAIN] Epoch: 1, Iter: 100, Loss: 1.43252
[TRAIN] Epoch: 1, Iter: 200, Loss: 1.51021
[TRAIN] Epoch: 1, Iter: 300, Loss: 2.13241
[TRAIN] Epoch: 1, Iter: 400, Loss: 1.79479
[TRAIN] Epoch: 1, Iter: 500, Loss: 1.63138
== [TRAIN] Epoch: 1, Accuracy: 0.316 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.40533
=== [VAL] Epoch: 1, Iter: 71, Accuracy: 0.302 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 2.17518
[TRAIN] Epoch: 2, Iter: 100, Loss: 1.49345
[TRAIN] Epoch: 2, Iter: 200, Loss: 1.70677
[TRAIN] Epoch: 2, Iter: 300, Loss: 1.61261
[TRAIN] Epoch: 2, Iter: 400, Loss: 1.32353
[TRAIN] Epoch