# CHOWDER

## Imports

In [None]:
!pip install torchmetrics



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset
import torch.nn as nn
import torch.optim as optim
import torchmetrics

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Load Data

In [None]:
processed_data_path = '/content/drive/My Drive/Breast_Cancer_Detection/Processed_Data/'

X_dev = np.load(processed_data_path + 'X_dev.npy')
y_dev = np.load(processed_data_path + 'y_dev.npy')
X_zoom = np.load(processed_data_path + 'zoom_train.npy')
X_coordinates = np.load(processed_data_path + 'coordinates_dev.npy')

X_test = np.load(processed_data_path + 'X_test.npy')
X_zoom_test = np.load(processed_data_path + 'zoom_test.npy')
X_coordinates_test = np.load(processed_data_path + 'coordinates_test.npy')

In [None]:
X_development = np.concatenate((X_dev, X_zoom, X_coordinates), axis=1)
X_test = np.concatenate((X_test, X_zoom_test, X_coordinates_test), axis=1)

moco_features = X_dev.shape[1]

In [None]:
# Free Memory
del X_dev
del X_zoom
del X_coordinates
del X_zoom_test
del X_coordinates_test

## Split Data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_development, y_dev, test_size=0.15, stratify=y_dev, random_state=42)

In [None]:
# Free up memory
del X_development
del y_dev

## Standardize Data

In [None]:
def X_standardize(X_train, X_val, X_test, moco_features):

    # MoCo Features
    moco_train = X_train[:, :moco_features]
    moco_val = X_val[:, :moco_features]
    moco_test = X_test[:, :moco_features]

    moco_mean = np.mean(moco_train)
    moco_std = np.std(moco_train)

    moco_train_scaled = (moco_train - moco_mean) / moco_std
    moco_val_scaled = (moco_val - moco_mean) / moco_std
    moco_test_scaled = (moco_test - moco_mean) / moco_std

    del moco_train
    del moco_val
    del moco_test

    # Zoom
    zoom_train = X_train[:, moco_features:moco_features+1000]
    zoom_val = X_val[:, moco_features:moco_features+1000]
    zoom_test = X_test[:, moco_features:moco_features+1000]

    zoom_mean = np.mean(zoom_train)
    zoom_std = np.std(zoom_train)

    zoom_train_scaled = (zoom_train - zoom_mean) / zoom_std
    zoom_val_scaled = (zoom_val - zoom_mean) / zoom_std
    zoom_test_scaled = (zoom_test - zoom_mean) / zoom_std

    del zoom_train
    del zoom_val
    del zoom_test

    # Merge Data
    X_train_scaled = np.concatenate((moco_train_scaled, zoom_train_scaled, X_train[:, moco_features+1000:]), axis=1)
    X_val_scaled = np.concatenate((moco_val_scaled, zoom_val_scaled, X_val[:, moco_features+1000:]), axis=1)
    X_test_scaled = np.concatenate((moco_test_scaled, zoom_test_scaled, X_test[:, moco_features+1000:]), axis=1)

    return X_train_scaled, X_val_scaled, X_test_scaled

In [None]:
X_train_scaled, X_val_scaled, X_test_scaled = X_standardize(X_train, X_val, X_test, moco_features)

In [None]:
# Free Memory
del X_train
del X_val
del X_test

## Convert to Tensor

In [None]:
X_train_tensor = torch.Tensor(X_train_scaled)
X_val_tensor = torch.Tensor(X_val_scaled)
X_test_tensor = torch.Tensor(X_test_scaled)

In [None]:
# Delete redundant variables to free up memory
del X_test_scaled
del X_train_scaled
del X_val_scaled

## CHOWDER Model

In [None]:
class CHOWDER(nn.Module):

    def __init__(self):
        super(CHOWDER, self).__init__()

        # Convolutional layer
        self.conv = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=2048, stride=2048)

        self.fc1 = nn.Linear(12, 1)

        self.init_weights()

    def init_weights(self):

        # Set std to the square root of the number of edges
        std_conv = 2048**(-0.5)
        # Initialize weights with random normal values for the convolutional layer
        nn.init.normal_(self.conv.weight, mean=0.0, std=std_conv)
        nn.init.constant_(self.conv.bias, 0)


    def forward(self, x):

        # Seperate x into moco data and metadata
        X_moco = x[:, :, :2048000]
        X_zoom = x[:, :, 2048000:2048000+1000]
        X_coord = x[:, :, 2048000+1000:]


        ## CONVOLUTION LAYER

        conv_output = self.conv(X_moco)

        # Calculate L2-norm on weights in the convolutional layer
        l2_reg_conv = 0.0
        for param in self.conv.parameters():
            l2_reg_conv += torch.sum(param ** 2)


        ## MINMAX LAYER

        # Sort each row of the conv layer (each row is a sample)
        sorted_output, sorted_indices = torch.sort(conv_output, dim=2)

        # Number of top instances and negative evidence
        R = 2

        # Select the first two and last two sorted tiles
        selected_output = sorted_output[:, :, :R]
        selected_tiles = torch.cat((selected_output, sorted_output[:, :, -R:]), dim=2)

        # Get Coord

        selected_indices = sorted_indices[:, :, :R]
        selected_coord_top_1 = torch.gather(X_coord, 2, 2*selected_indices)
        selected_coord_top_2 = torch.gather(X_coord, 2, 2*selected_indices+1)
        selected_coord_top = torch.cat((selected_coord_top_1, selected_coord_top_2), dim=2)

        selected_indices = sorted_indices[:, :, -R:]
        selected_coord_bottom_1 = torch.gather(X_coord, 2, 2*selected_indices)
        selected_coord_bottom_2 = torch.gather(X_coord, 2, 2*selected_indices+1)
        selected_coord_bottom = torch.cat((selected_coord_bottom_1, selected_coord_bottom_2), dim=2)

        # Concatenate the top and bottom zoom values
        selected_meta = torch.cat((selected_coord_top, selected_coord_bottom), dim=2)

        # Join tiles and zoom
        a0 = torch.cat((selected_tiles, selected_meta), dim=2)
        a0 = a0.squeeze()

        # Linear layer
        z1 = self.fc1(a0)

        # Nonlinear layer
        output = torch.sigmoid(z1)

        return output, l2_reg_conv

In [None]:
demo_model = CHOWDER()

demo_model

CHOWDER(
  (conv): Conv1d(1, 1, kernel_size=(2048,), stride=(2048,))
  (fc1): Linear(in_features=12, out_features=1, bias=True)
)

## Setup Hyperparameters and Data Loader

In [None]:
# Define hyperparameters
BATCH_SIZE = 10
LR = 0.001
EPOCHS = 20
NUM_ENSEMBLE_MODELS = 10

In [None]:
class CustomDataset(Dataset):

    def __init__(self, data_tensor, target_tensor):
        self.data = data_tensor
        self.target = target_tensor

    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        return x, y

    def __len__(self):
        return len(self.data)

# Create custom datasets
train_dataset = CustomDataset(X_train_tensor, torch.Tensor(y_train))
val_dataset = CustomDataset(X_val_tensor, torch.Tensor(y_val))

# Create DataLoader
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [None]:
# Free Up Memory
del X_train_tensor
del X_val_tensor
del y_train
del y_val

## Train Model and Validate

In [None]:
# Define loss function and optimizer
loss_function = nn.BCELoss()

# Create an ensemble of models
ensemble_models = []

for _ in range(NUM_ENSEMBLE_MODELS):

    # Initialize the model
    model = CHOWDER()

    # Define optimizer
    optimizer = optim.Adam(model.parameters(), lr=LR)
    ensemble_models.append((model, optimizer))

# Setup model counter
model_counter = 1

# Training and validation loops for each model in the ensemble
for model, optimizer in ensemble_models:

    print(f'\n ----- Model {model_counter} -----')
    model_counter += 1

    # Loop thorugh each epoch
    for epoch in range(EPOCHS):

        ## Training loop

        # Put model in train mode
        model.train()

        # Initialize loss, AUC and count
        total_loss = 0.0
        auroc_hist_train = 0.0
        total_count = 0.0

        # Train in batches
        for batch_x, batch_y in train_dl:

            batch_x_transformed = batch_x.unsqueeze(1)

            # Zero gradients
            optimizer.zero_grad()

            # Make predictions and get L2-norm from conv layer and linear layer
            pred, L2_conv = model(batch_x_transformed)

            # Calculate loss
            batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
            loss = loss_function(pred, batch_y) + (0.1*L2_conv)

            # Calculate gradients
            loss.backward()

            # Make step in gradient descent
            optimizer.step()

            # Add to loss counter for this epoch
            total_loss += loss.item() * len(batch_y)
            total_count += len(batch_y)

            # Calculate AUC for batch
            auroc_hist_train += torchmetrics.AUROC(task="binary")(pred, batch_y).item() * len(batch_y)

        # Calculate loss and AUC per sample
        train_average_loss = total_loss / total_count
        train_average_auc = auroc_hist_train/ total_count


        ## Validation loop

        # Put model in evaluation mode
        model.eval()

        # Fix gradients (only using model to predict)
        with torch.no_grad():

            # Initialize loss, AUC and count
            total_loss = 0.0
            auroc_hist_val = 0.0
            total_count = 0.0

            # Validate in batches
            for batch_x, batch_y in val_dl:

                batch_x_transformed = batch_x.unsqueeze(1)

                # Make predictions
                val_pred, _ = model(batch_x_transformed)

                # Calculate loss
                batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
                loss = loss_function(val_pred, batch_y)

                # Add to loss for this epoch
                total_loss += loss.item() * len(batch_y)
                total_count += len(batch_y)

                # Calculate AUC for batch
                auroc_hist_val += torchmetrics.AUROC(task="binary")(val_pred, batch_y).item() * len(batch_y)

        # Calculate loss and AUC per sample
        val_average_loss = total_loss / total_count
        val_average_auc =  auroc_hist_val / total_count

        # Print results from each epoch
        print(f"Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {train_average_loss:.4f} Train AUC Score: {train_average_auc:.4f} \
        Val Loss: {val_average_loss:.4f} Val AUC Score: {val_average_auc:.4f}")


 ----- Model 1 -----




Epoch [1/20] - Train Loss: 55.2415 Train AUC Score: 0.5117         Val Loss: 55.4521 Val AUC Score: 0.4782
Epoch [2/20] - Train Loss: 44.0705 Train AUC Score: 0.4769         Val Loss: 33.6679 Val AUC Score: 0.4907
Epoch [3/20] - Train Loss: 25.7124 Train AUC Score: 0.4823         Val Loss: 16.5836 Val AUC Score: 0.4215
Epoch [4/20] - Train Loss: 14.9620 Train AUC Score: 0.4956         Val Loss: 8.3711 Val AUC Score: 0.5842
Epoch [5/20] - Train Loss: 13.7194 Train AUC Score: 0.5264         Val Loss: 9.3522 Val AUC Score: 0.5055
Epoch [6/20] - Train Loss: 13.0617 Train AUC Score: 0.4770         Val Loss: 5.7990 Val AUC Score: 0.5906
Epoch [7/20] - Train Loss: 11.3443 Train AUC Score: 0.4819         Val Loss: 8.4215 Val AUC Score: 0.5365
Epoch [8/20] - Train Loss: 12.8292 Train AUC Score: 0.4563         Val Loss: 8.3744 Val AUC Score: 0.5915
Epoch [9/20] - Train Loss: 10.8507 Train AUC Score: 0.3848         Val Loss: 6.3261 Val AUC Score: 0.6607
Epoch [10/20] - Train Loss: 9.5353 Train AU



Epoch [11/20] - Train Loss: 10.9780 Train AUC Score: 0.5093         Val Loss: 17.2679 Val AUC Score: 0.4573
Epoch [12/20] - Train Loss: 10.7863 Train AUC Score: 0.5281         Val Loss: 12.2574 Val AUC Score: 0.4970
Epoch [13/20] - Train Loss: 10.9574 Train AUC Score: 0.5872         Val Loss: 13.4124 Val AUC Score: 0.4623
Epoch [14/20] - Train Loss: 9.1554 Train AUC Score: 0.5925         Val Loss: 11.6825 Val AUC Score: 0.5312
Epoch [15/20] - Train Loss: 8.9841 Train AUC Score: 0.6130         Val Loss: 7.3253 Val AUC Score: 0.5386
Epoch [16/20] - Train Loss: 10.3072 Train AUC Score: 0.5624         Val Loss: 10.8647 Val AUC Score: 0.5885
Epoch [17/20] - Train Loss: 6.9680 Train AUC Score: 0.6258         Val Loss: 5.9549 Val AUC Score: 0.3958
Epoch [18/20] - Train Loss: 7.3632 Train AUC Score: 0.5991         Val Loss: 12.3221 Val AUC Score: 0.4410
Epoch [19/20] - Train Loss: 7.4774 Train AUC Score: 0.5899         Val Loss: 5.1559 Val AUC Score: 0.4861
Epoch [20/20] - Train Loss: 7.5094 T

In [None]:
# Define loss function and optimizer
loss_function = nn.BCELoss()

# Create an ensemble of models
ensemble_models = []

for _ in range(NUM_ENSEMBLE_MODELS):

    # Initialize the model
    model = CHOWDER()

    # Define optimizer
    optimizer = optim.Adam(model.parameters(), lr=LR)
    ensemble_models.append((model, optimizer))

# Setup model counter
model_counter = 1

# Training loops for each model in the ensemble
for model, optimizer in ensemble_models:

    print(f'\n ----- Model {model_counter} -----')
    model_counter += 1

    # Loop thorugh each epoch
    for epoch in range(EPOCHS):

        ## Training loop

        # Put model in train mode
        model.train()

        # Initialize loss, AUC and count
        total_loss = 0.0
        auroc_hist_train = 0.0
        total_count = 0.0

        # Train in batches
        for batch_x, batch_y in dev_dl:

            # Add a dimension for channel numbers
            batch_x_transformed = batch_x.unsqueeze(1)

            # Zero gradients
            optimizer.zero_grad()

            # Add a dimension for channel numbers
            batch_x_transformed = batch_x.unsqueeze(1)

            # Make predictions and get L2-norm from conv layer
            pred, L2_term = model(batch_x_transformed)

            # Calculate loss
            batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
            loss = loss_function(pred, batch_y) + (0.5*L2_term)

            # Calculate gradients
            loss.backward()

            # Make step in gradient descent
            optimizer.step()

            # Add to loss counter for this epoch
            total_loss += loss.item() * len(batch_y)
            total_count += len(batch_y)

            # Calculate AUC for batch
            auroc_hist_train += torchmetrics.AUROC(task="binary")(pred, batch_y).item() * len(batch_y)

        # Calculate loss and AUC per sample
        train_average_loss = total_loss / total_count
        train_average_auc = auroc_hist_train/ total_count

        # Print results from each epoch
        print(f"Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {train_average_loss:.4f} Train AUC Score: {train_average_auc:.4f}")


## Test Model

In [None]:
## Setup DataLoaders

# Create a dummy y for test set
y_test_dummy = torch.zeros(len(X_test_tensor),)

# Create custom datasets
test_dataset = CustomDataset(X_test_tensor, y_test_dummy)

# Create DataLoader
test_dl = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Merge them using ConcatDataset
dev_dataset = ConcatDataset([train_dataset, val_dataset])

# Create a DataLoader for the merged dataset
dev_dl = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [None]:
# Free Up Memory
del X_test_tensor
del train_dataset
del val_dataset

In [None]:
# Define loss function and optimizer
loss_function = nn.BCELoss()

# Create an ensemble of models
ensemble_models = []

for _ in range(NUM_ENSEMBLE_MODELS):

    # Initialize the model
    model = CHOWDER()

    # Define optimizer
    optimizer = optim.Adam(model.parameters(), lr=LR)
    ensemble_models.append((model, optimizer))

# Setup model counter
model_counter = 1

# Training loops for each model in the ensemble
for model, optimizer in ensemble_models:

    print(f'\n ----- Model {model_counter} -----')
    model_counter += 1

    # Loop thorugh each epoch
    for epoch in range(EPOCHS):

        ## Training loop

        # Put model in train mode
        model.train()

        # Initialize loss, AUC and count
        total_loss = 0.0
        auroc_hist_train = 0.0
        total_count = 0.0

        # Train in batches
        for batch_x, batch_y in dev_dl:

            # Add a dimension for channel numbers
            batch_x_transformed = batch_x.unsqueeze(1)

            # Zero gradients
            optimizer.zero_grad()

            # Make predictions and get L2-norm from conv layer
            pred, l2_reg_conv, l2_reg_linear = model(batch_x_transformed)

            # Calculate loss
            batch_y = batch_y.view(-1, 1) # Reshape batch_y from (10) to (10,1)
            loss = loss_function(pred, batch_y) + (0.1*l2_reg_conv) + (0.0001*l2_reg_linear)

            # Calculate gradients
            loss.backward()

            # Make step in gradient descent
            optimizer.step()

            # Add to loss counter for this epoch
            total_loss += loss.item() * len(batch_y)
            total_count += len(batch_y)

            # Calculate AUC for batch
            auroc_hist_train += torchmetrics.AUROC(task="binary")(pred, batch_y).item() * len(batch_y)

        # Calculate loss and AUC per sample
        train_average_loss = total_loss / total_count
        train_average_auc = auroc_hist_train/ total_count

        # Print results from each epoch
        print(f"Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {train_average_loss:.4f} Train AUC Score: {train_average_auc:.4f}")

In [None]:
# After training all models, calculate the ensemble AUC

# Store predictions and labels
ensemble_predictions = np.zeros((len(test_dataset), len(ensemble_models)))

# Do not change gradients
with torch.no_grad():

    model_counter = 0

    # Loop through each model
    for model, _ in ensemble_models:

        # Put model in evaluation mode
        model.eval()

        # Get predictions on the validation dataset
        predictions = np.empty((0, 1))

        for batch_x, _ in test_dl:

            extra = 10 - len(batch_x)

            # If batch size is smaller than 10, pad rows in batch_x with 0s
            if extra > 0:
                pad_tensor = torch.zeros((extra,) + batch_x.shape[1:], dtype=batch_x.dtype)
                batch_x = torch.cat((batch_x, pad_tensor), dim=0)

            # Add a dimension for channel numbers
            batch_x_transformed = batch_x.unsqueeze(1)

            # Make predictions
            pred, _, _ = model(batch_x_transformed)

            # Save predictions
            pred_numpy = pred.numpy()
            predictions = np.concatenate((predictions, pred_numpy), axis=0)

            if extra > 0:
              predictions = predictions[:-extra]

        ensemble_predictions[:, model_counter] = predictions.squeeze()
        model_counter += 1

# Average the predictions from all models
average_prediction = np.mean(ensemble_predictions, axis=1)

In [None]:
# Load metadata about each sample
data_path = '/content/drive/My Drive/Breast_Cancer_Detection/Data/'
df_test = pd.read_csv(data_path + "test_metadata.csv")

# Join sample ID metadata with probability prediction
CHOWDER_submission = pd.DataFrame( {"Sample ID": df_test["Sample ID"].values, "Target": average_prediction}).sort_values("Sample ID")

In [None]:
def sanity_checks(submission):
    assert all(submission["Target"].between(0, 1)), "`Target` values must be in [0, 1]"
    assert submission.shape == (149, 2), "Your submission file must be of shape (149, 2)"
    assert list(submission.columns) == ["Sample ID", "Target",], "Your submission file must have columns `Sample ID` and `Target`"

sanity_checks(CHOWDER_submission)

In [None]:
submission_path = '/content/drive/My Drive/Breast_Cancer_Detection/Predictions/'

CHOWDER_submission.to_csv(submission_path + "CHOWDER_submission_hybrid.csv", index=None)