# CHOWDER

## Imports

In [None]:
!pip install torchmetrics



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset
import torch.nn as nn
import torch.optim as optim
import torchmetrics

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Load Data

In [None]:
processed_data_path = '/content/drive/My Drive/Breast_Cancer_Detection/Processed_Data/'

X_dev = np.load(processed_data_path + 'X_dev.npy')
y_dev = np.load(processed_data_path + 'y_dev.npy')

X_test = np.load(processed_data_path + 'X_test.npy')

## Split Data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, test_size=0.15, stratify=y_dev, random_state=42)

## Standardize Data

In [None]:
def X_standardize(X_train, X_val, X_test):

    feature_mean = np.mean(X_train)
    feature_std = np.std(X_train)

    X_train_scaled = (X_train - feature_mean) / feature_std
    X_val_scaled = (X_val - feature_mean) / feature_std
    X_test_scaled = (X_test - feature_mean) / feature_std

    return X_train_scaled, X_val_scaled, X_test_scaled

In [None]:
X_train_scaled, X_val_scaled, X_test_scaled = X_standardize(X_train, X_val, X_test)

## Convert to Tensor

In [None]:
X_train_tensor = torch.Tensor(X_train_scaled)
X_val_tensor = torch.Tensor(X_val_scaled)
X_test_tensor = torch.Tensor(X_test_scaled)

In [None]:
# Delete redundant variables to free up memory
del X_train_scaled
del X_val_scaled
del X_test_scaled
del X_train
del X_val
del X_test
del X_dev

## CHOWDER Model

In [None]:
class CHOWDER(nn.Module):

    def __init__(self):
        super(CHOWDER, self).__init__()

        # Convolutional layer
        self.conv = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=2048, stride=2048)

        self.fc1 = nn.Linear(4, 1)

        self.init_weights()

    def init_weights(self):

        # Set std to the square root of the number of edges
        std_conv = 2048**(-0.5)
        # Initialize weights with random normal values for the convolutional layer
        nn.init.normal_(self.conv.weight, mean=0.0, std=std_conv)
        nn.init.constant_(self.conv.bias, 0)


    def forward(self, x):

        ## CONVOLUTION LAYER
        conv_output = self.conv(x)

        # Calculate L2-norm on weights in the convolutional layer
        l2_reg = 0.0
        for param in self.conv.parameters():
            l2_reg += torch.sum(param ** 2)

        ## MINMAX LAYER

        # Sort each row of the conv layer (each row is a sample)
        sorted_output, _ = torch.sort(conv_output, dim=2)

        # Number of top instances and negative evidence
        R = 2

        # Select the first two and last two sorted outputs
        selected_output = sorted_output[:, :, :R]
        a0 = torch.cat((selected_output, sorted_output[:, :, -R:]), dim=2)

        a0 = a0.squeeze()

        z1 = self.fc1(a0)

        output = torch.sigmoid(z1)

        return output, l2_reg

In [None]:
demo_model = CHOWDER()

demo_model

CHOWDER(
  (conv): Conv1d(1, 1, kernel_size=(2048,), stride=(2048,))
  (fc1): Linear(in_features=4, out_features=1, bias=True)
)

## Setup Hyperparameters and Data Loader

In [None]:
# Define hyperparameters
BATCH_SIZE = 16
NUM_ENSEMBLE_MODELS = 10
# Define loss function and optimizer
loss_function = nn.BCELoss()

In [None]:
class CustomDataset(Dataset):

    def __init__(self, data_tensor, target_tensor):
        self.data = data_tensor
        self.target = target_tensor

    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        return x, y

    def __len__(self):
        return len(self.data)

# Create custom datasets
train_dataset = CustomDataset(X_train_tensor, torch.Tensor(y_train))
val_dataset = CustomDataset(X_val_tensor, torch.Tensor(y_val))

# Create DataLoader
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [None]:
# Free Up Memory
del X_train_tensor
del X_val_tensor
del y_train
del y_val

## Train Model and Validate

In [None]:
LR = [0.0001]
EPOCHS = [30]
L2 = [0.1, 0.5]

for lr in LR:
    for epochs in EPOCHS:
        for l2 in L2:

            # Create an ensemble of models
            ensemble_models = []

            for _ in range(NUM_ENSEMBLE_MODELS):

                # Initialize the model
                model = CHOWDER()

                # Define optimizer
                optimizer = optim.Adam(model.parameters(), lr=lr)
                ensemble_models.append((model, optimizer))

            # Setup model counter
            model_counter = 1

            # Training and validation loops for each model in the ensemble
            for model, optimizer in ensemble_models:

                print(f'\n ----- Model {model_counter} -----')
                model_counter += 1

                # Loop through each epoch
                for epoch in range(epochs):
                    ## Training loop

                    # Put model in train mode
                    model.train()

                    # Train in batches
                    for batch_x, batch_y in train_dl:

                        batch_x_transformed = batch_x.unsqueeze(1)

                        # Zero gradients
                        optimizer.zero_grad()

                        # Make predictions and get L2-norm from the conv layer
                        pred, weight_decay = model(batch_x_transformed)

                        # Calculate loss
                        batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
                        loss = loss_function(pred, batch_y) + (l2 * weight_decay)

                        # Calculate gradients
                        loss.backward()

                        # Make a step in gradient descent
                        optimizer.step()

                    # Print results from each epoch
                    print(f"Epoch {epoch+1}/{epochs}")

            # After training all models, calculate the ensemble AUC

            # Store predictions and labels
            ensemble_predictions = []
            val_labels = []

            # Do not change gradients
            with torch.no_grad():

                # Loop through each model
                for model, _ in ensemble_models:

                    # Put model in evaluation mode
                    model.eval()

                    # Get predictions on the validation dataset
                    predictions = []
                    batch_labels = []  # Create a list to store labels for each batch

                    for batch_x, batch_y in val_dl:

                        batch_x_transformed = batch_x.unsqueeze(1)

                        # Make predictions
                        pred, _ = model(batch_x_transformed)
                        # Save predictions
                        predictions.append(pred)

                        # Store val labels for this batch
                        batch_labels.append(batch_y)

                    # Concatenate the labels for this model
                    batch_labels = torch.cat(batch_labels, dim=0)
                    val_labels.append(batch_labels)

                    predictions = torch.cat(predictions)
                    ensemble_predictions.append(predictions)

            # Concatenate all the validation labels
            val_labels = torch.cat(val_labels, dim=0)

            # Average the predictions from all models
            ensemble_predictions = torch.stack(ensemble_predictions)
            average_predictions = torch.mean(ensemble_predictions, dim=0)

            # Calculate the AUC score based on the averaged predictions
            average_auc = torchmetrics.AUROC(task="binary")(average_predictions, val_labels).item()
            print(f"LR:{lr} Epochs:{epochs} L2:{l2} --- Ensemble AUC Score: {average_auc:.4f}")




 ----- Model 1 -----
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

 ----- Model 2 -----
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

 ----- Model 3 -----
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
E