# Notebook for Google Colab

My main code for this project is in [aneurysm_detection.ipynb](aneurysm_detection.ipynb). For organization and simplification purposes, the code for running my models on a Google Colab GPU environment is all here: this mainly involves loading the generated data. The generated data was created from the preprocessing code in [aneurysm_detection.ipynb](aneurysm_detection.ipynb)

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import random
import pickle
import json
from pydicom import dcmread
from pydicom.data import get_testdata_file
import keras
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout, GlobalAveragePooling2D, BatchNormalization, Activation
from keras.models import load_model, Model
from keras.callbacks import ModelCheckpoint
from keras.metrics import BinaryAccuracy, Precision, Recall
from keras.optimizers import Adam, RMSprop
from keras_tuner import Hyperband
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.data import Dataset
import time
import cv2
import gc
import plotly.express as px
import sys

In [None]:
from google.colab import drive
drive.mount('/content/drive')
home_folder = '/content/drive/My Drive/Colab Notebooks/rsna-intercranial-aneurysm-detection/'
data_gen_folder = home_folder + 'data_gen_2/'

In [None]:
X_val_file_path = data_gen_folder + 'X_val.pkl'
X_train_file_path = data_gen_folder + 'X_train.pkl'
y_val_file_path = data_gen_folder + 'y_val.csv'
y_train_file_path = data_gen_folder + 'y_train.csv'

In [None]:
y_val_loaded = pd.read_csv(y_val_file_path)
y_train_loaded = pd.read_csv(y_train_file_path)

y_val_loaded = np.array(y_val_loaded)
y_train_loaded = np.array(y_train_loaded)

y_val_in_brain = y_val_loaded[:, 0]
y_val_visible = y_val_loaded[:, 1]
y_train_in_brain = y_train_loaded[:, 0]
y_train_visible = y_train_loaded[:, 1]

In [None]:
with open(X_val_file_path, 'rb') as file:
    X_val_loaded = pickle.load(file)

In [None]:
with open(X_train_file_path, 'rb') as file:
    X_train_loaded = pickle.load(file)

In [None]:
X_train_loaded = X_train_loaded[..., np.newaxis]
X_val_loaded = X_val_loaded[..., np.newaxis]

In [None]:
training_history_file_path = data_gen_folder + 'training_history.txt'
model_save_file_path = data_gen_folder + 'saved_model.keras'

In [None]:
val_visible_ds = Dataset.from_tensor_slices((X_val_loaded, y_val_visible))
train_visible_ds = Dataset.from_tensor_slices((X_train_loaded, y_train_visible))

In [None]:
val_in_brain_ds = Dataset.from_tensor_slices((X_val_loaded, y_val_in_brain))
train_in_brain_ds = Dataset.from_tensor_slices((X_train_loaded, y_train_in_brain))

In [None]:
del X_train_loaded
del X_val_loaded
gc.collect()

In [None]:
processed_image_dim = 512

In [None]:
# This model can be used *separately* for in_brain_with_aneurysm and aneurysm_visible_in_image
# Different architectures may be better suited for each problem
def build_model(hp):
    inputs = Input(shape=(processed_image_dim, processed_image_dim, 1))

    # Vary the number of filters in conv layers
    x = Conv2D(filters=hp.Int('conv1_filters', min_value=16, max_value=64, step=16),
               kernel_size=hp.Choice('conv1_kernel', values=[3, 5]), activation='relu')(inputs)
    x = MaxPooling2D(2)(x)

    x = Conv2D(filters=hp.Int('conv2_filters', min_value=32, max_value=128, step=32),
               kernel_size=hp.Choice('conv2_kernel', values=[3, 5]), activation='relu')(x)
    x = MaxPooling2D(2)(x)

    x = Conv2D(filters=hp.Int('conv3_filters', 64, 256, step=64), kernel_size=3, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(2)(x)
    x = Dropout(rate=hp.Float('dropout_1', 0.2, 0.4, step=0.1))(x)

    x = Conv2D(256, 3, activation='relu')(x)
    x = MaxPooling2D(2)(x)

    x = Conv2D(256, 3, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=hp.Float('dropout_2', 0.3, 0.5, step=0.1))(x)

    out = Dense(1, activation='sigmoid')(x)

    # Choose between the two (seemingly) most popular optimizers and a variety of learning rates
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop'])
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=hp.Float('adam_lr', 1e-5, 1e-3, sampling='log'))
    else:
        optimizer = RMSprop(learning_rate=hp.Float('rms_lr', 1e-5, 1e-3, sampling='log'))

    model = keras.Model(inputs, out)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[keras.metrics.BinaryAccuracy(), keras.metrics.Recall(), keras.metrics.Precision()]
    )

    return model

In [None]:
tuner_results_folder = data_gen_folder + 'tuner_results'
experiment_name = 'e1'

tuner = Hyperband(
    build_model,                          # your model-building function
    objective='val_loss',                 # what to optimize
    max_epochs=40,                        # maximum epochs for top models
    factor=3,                             # reduction factor per bracket
    hyperband_iterations=2,               # how many full Hyperband rounds
    seed=42,
    directory=tuner_results_folder,
    project_name=experiment_name,
)

# Optional early stopping to save time
stop_early = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

tuner.search(
    train_in_brain_ds,
    validation_data=val_in_brain_ds,
    epochs=40,
    callbacks=[stop_early],
)