# CHOWDER

## Imports

In [None]:
!pip install torchmetrics



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset
import torch.nn as nn
import torch.optim as optim
import torchmetrics

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Load Data

In [None]:
processed_data_path = '/content/drive/My Drive/Breast_Cancer_Detection/Processed_Data/'

X_dev = np.load(processed_data_path + 'X_dev.npy')
y_dev = np.load(processed_data_path + 'y_dev.npy')
X_zoom = np.load(processed_data_path + 'zoom_train.npy')
X_coordinates = np.load(processed_data_path + 'coordinates_dev.npy')

X_test = np.load(processed_data_path + 'X_test.npy')
X_zoom_test = np.load(processed_data_path + 'zoom_test.npy')
X_coordinates_test = np.load(processed_data_path + 'coordinates_test.npy')

In [None]:
X_development = np.concatenate((X_dev, X_zoom, X_coordinates), axis=1)
X_test = np.concatenate((X_test, X_zoom_test, X_coordinates_test), axis=1)

moco_features = X_dev.shape[1]

In [None]:
# Free Memory
del X_dev
del X_zoom
del X_coordinates
del X_zoom_test
del X_coordinates_test

## Split Data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_development, y_dev, test_size=0.15, stratify=y_dev, random_state=42)

In [None]:
# Free up memory
del X_development
del y_dev

## Standardize Data

In [None]:
def X_standardize(X_train, X_val, X_test, moco_features):

    # MoCo Features
    moco_train = X_train[:, :moco_features]
    moco_val = X_val[:, :moco_features]
    moco_test = X_test[:, :moco_features]

    moco_mean = np.mean(moco_train)
    moco_std = np.std(moco_train)

    moco_train_scaled = (moco_train - moco_mean) / moco_std
    moco_val_scaled = (moco_val - moco_mean) / moco_std
    moco_test_scaled = (moco_test - moco_mean) / moco_std

    del moco_train
    del moco_val
    del moco_test

    # Zoom
    zoom_train = X_train[:, moco_features:moco_features+1000]
    zoom_val = X_val[:, moco_features:moco_features+1000]
    zoom_test = X_test[:, moco_features:moco_features+1000]

    zoom_mean = np.mean(zoom_train)
    zoom_std = np.std(zoom_train)

    zoom_train_scaled = (zoom_train - zoom_mean) / zoom_std
    zoom_val_scaled = (zoom_val - zoom_mean) / zoom_std
    zoom_test_scaled = (zoom_test - zoom_mean) / zoom_std

    del zoom_train
    del zoom_val
    del zoom_test

    # Merge Data
    X_train_scaled = np.concatenate((moco_train_scaled, zoom_train_scaled, X_train[:, moco_features+1000:]), axis=1)
    X_val_scaled = np.concatenate((moco_val_scaled, zoom_val_scaled, X_val[:, moco_features+1000:]), axis=1)
    X_test_scaled = np.concatenate((moco_test_scaled, zoom_test_scaled, X_test[:, moco_features+1000:]), axis=1)

    return X_train_scaled, X_val_scaled, X_test_scaled

In [None]:
X_train_scaled, X_val_scaled, X_test_scaled = X_standardize(X_train, X_val, X_test, moco_features)

In [None]:
# Free Memory
del X_train
del X_val
del X_test

## Convert to Tensor

In [None]:
X_train_tensor = torch.Tensor(X_train_scaled)
X_val_tensor = torch.Tensor(X_val_scaled)
X_test_tensor = torch.Tensor(X_test_scaled)

In [None]:
# Delete redundant variables to free up memory
del X_test_scaled
del X_train_scaled
del X_val_scaled

## CHOWDER Model

In [None]:
class CHOWDER(nn.Module):

    def __init__(self):
        super(CHOWDER, self).__init__()

        # Convolutional layer
        self.conv = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=2048, stride=2048)

        self.fc1 = nn.Linear(12, 1)

        self.init_weights()

    def init_weights(self):

        nn.init.xavier_uniform_(self.conv.weight)
        nn.init.xavier_uniform_(self.fc1.weight)


    def forward(self, x):

        # Seperate x into moco data and metadata
        X_moco = x[:, :, :2048000]
        X_zoom = x[:, :, 2048000:2048000+1000]
        X_coord = x[:, :, 2048000+1000:]


        ## CONVOLUTION LAYER

        conv_output = self.conv(X_moco)

        # Calculate L2-norm on weights in the convolutional layer
        l2_reg_conv = 0.0
        for param in self.conv.parameters():
            l2_reg_conv += torch.sum(param ** 2)


        ## MINMAX LAYER

        # Sort each row of the conv layer (each row is a sample)
        sorted_output, sorted_indices = torch.sort(conv_output, dim=2)

        # Number of top instances and negative evidence
        R = 2

        # Select the first two and last two sorted tiles
        selected_output = sorted_output[:, :, :R]
        selected_tiles = torch.cat((selected_output, sorted_output[:, :, -R:]), dim=2)

        # Get Coord

        selected_indices = sorted_indices[:, :, :R]
        selected_coord_top_1 = torch.gather(X_coord, 2, 2*selected_indices)
        selected_coord_top_2 = torch.gather(X_coord, 2, 2*selected_indices+1)
        selected_coord_top = torch.cat((selected_coord_top_1, selected_coord_top_2), dim=2)

        selected_indices = sorted_indices[:, :, -R:]
        selected_coord_bottom_1 = torch.gather(X_coord, 2, 2*selected_indices)
        selected_coord_bottom_2 = torch.gather(X_coord, 2, 2*selected_indices+1)
        selected_coord_bottom = torch.cat((selected_coord_bottom_1, selected_coord_bottom_2), dim=2)

        # Concatenate the top and bottom zoom values
        selected_meta = torch.cat((selected_coord_top, selected_coord_bottom), dim=2)

        # Join tiles and zoom
        a0 = torch.cat((selected_tiles, selected_meta), dim=2)
        a0 = a0.squeeze()

        # Linear layer
        z1 = self.fc1(a0)

        # Calculate the L2 regularization term
        l2_reg_linear = 0.0
        for param in self.fc1.parameters():
            l2_reg_linear += torch.sum(param ** 2)

        # Nonlinear layer
        output = torch.sigmoid(z1)

        return output, l2_reg_conv, l2_reg_linear

In [None]:
demo_model = CHOWDER()

demo_model

CHOWDER(
  (conv): Conv1d(1, 1, kernel_size=(2048,), stride=(2048,))
  (fc1): Linear(in_features=12, out_features=1, bias=True)
)

## Setup Hyperparameters and Data Loader

In [None]:
# Define hyperparameters
BATCH_SIZE = 10
LR = 0.001
EPOCHS = 20
NUM_ENSEMBLE_MODELS = 10

In [None]:
class CustomDataset(Dataset):

    def __init__(self, data_tensor, target_tensor):
        self.data = data_tensor
        self.target = target_tensor

    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        return x, y

    def __len__(self):
        return len(self.data)

# Create custom datasets
train_dataset = CustomDataset(X_train_tensor, torch.Tensor(y_train))
val_dataset = CustomDataset(X_val_tensor, torch.Tensor(y_val))

# Create DataLoader
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [None]:
# Free Up Memory
del X_train_tensor
del X_val_tensor
del y_train
del y_val

## Train Model and Validate

In [None]:
LR = [0.001, 0.0001]
EPOCHS = [20]
L2_conv_param = [0.1, 0.01]
L2_linear_param = [0.01, 0.001, 0.0001]
loss_function = nn.BCELoss()

for lr in LR:
    for epochs in EPOCHS:
        for l2_conv in L2_conv_param:
            for l2_linear in L2_linear_param:

                # Create an ensemble of models
                ensemble_models = []

                for _ in range(NUM_ENSEMBLE_MODELS):

                    # Initialize the model
                    model = CHOWDER()

                    # Define optimizer
                    optimizer = optim.Adam(model.parameters(), lr=lr)
                    ensemble_models.append((model, optimizer))

                # Setup model counter
                model_counter = 1

                # Training and validation loops for each model in the ensemble
                for model, optimizer in ensemble_models:

                    print(f'\n ----- Model {model_counter} -----')
                    model_counter += 1

                    # Loop through each epoch
                    for epoch in range(epochs):
                        ## Training loop

                        # Put model in train mode
                        model.train()

                        # Train in batches
                        for batch_x, batch_y in train_dl:

                            batch_x_transformed = batch_x.unsqueeze(1)

                            # Zero gradients
                            optimizer.zero_grad()

                            # Make predictions and get L2-norm from the conv layer
                            pred, L2_conv, L2_linear = model(batch_x_transformed)

                            # Calculate loss
                            batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
                            loss = loss_function(pred, batch_y) + (l2_conv * L2_conv) + (l2_linear * L2_linear)

                            # Calculate gradients
                            loss.backward()

                            # Make a step in gradient descent
                            optimizer.step()

                        # Print results from each epoch
                        print(f"Epoch {epoch+1}/{epochs}")

                # After training all models, calculate the ensemble AUC

                # Store predictions and labels
                ensemble_predictions = []
                val_labels = []

                # Do not change gradients
                with torch.no_grad():

                    # Loop through each model
                    for model, _ in ensemble_models:

                        # Put model in evaluation mode
                        model.eval()

                        # Get predictions on the validation dataset
                        predictions = []
                        batch_labels = []  # Create a list to store labels for each batch

                        for batch_x, batch_y in val_dl:

                            batch_x_transformed = batch_x.unsqueeze(1)

                            # Make predictions
                            pred, _, _ = model(batch_x_transformed)
                            # Save predictions
                            predictions.append(pred)

                            # Store val labels for this batch
                            batch_labels.append(batch_y)

                        # Concatenate the labels for this model
                        batch_labels = torch.cat(batch_labels, dim=0)
                        val_labels.append(batch_labels)

                        predictions = torch.cat(predictions)
                        ensemble_predictions.append(predictions)

                # Concatenate all the validation labels
                val_labels = torch.cat(val_labels, dim=0)

                # Average the predictions from all models
                ensemble_predictions = torch.stack(ensemble_predictions)
                average_predictions = torch.mean(ensemble_predictions, dim=0)

                # Calculate the AUC score based on the averaged predictions
                average_auc = torchmetrics.AUROC(task="binary")(average_predictions, val_labels).item()
                print(f"LR:{lr} Epochs:{epochs} L2 conv:{l2_conv} L2 lin:{l2_linear} --- Ensemble AUC Score: {average_auc:.4f}")


 ----- Model 1 -----
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 ----- Model 2 -----
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 ----- Model 3 -----
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 ----- Model 4 -----
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
