# Notebook for Google Colab

My main code for this project is in [aneurysm_detection.ipynb](aneurysm_detection.ipynb). For organization and simplification purposes, the code for running my models on a Google Colab GPU environment is all here: this mainly involves loading the generated data. The generated data was created from the preprocessing code in [aneurysm_detection.ipynb](aneurysm_detection.ipynb)

In [1]:
!pip install pydicom keras_tuner
import pandas as pd
import numpy as np
from pathlib import Path
import random
import pickle
import json
from pydicom import dcmread
from pydicom.data import get_testdata_file
import keras
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout, GlobalAveragePooling2D, BatchNormalization, Activation
from keras.models import load_model, Model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.metrics import BinaryAccuracy, Precision, Recall
from keras.optimizers import Adam, RMSprop
from keras_tuner import Hyperband
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.data import Dataset
import time
import cv2
import gc
import plotly.express as px
import sys



In [2]:
from google.colab import drive
drive.mount('/content/drive')
home_folder = '/content/drive/My Drive/Colab Notebooks/rsna-intercranial-aneurysm-detection/'
data_gen_folder = home_folder + 'data_gen_2/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
X_val_file_path = data_gen_folder + 'X_val.pkl'
X_train_file_path = data_gen_folder + 'X_train.pkl'
y_val_file_path = data_gen_folder + 'y_val.csv'
y_train_file_path = data_gen_folder + 'y_train.csv'

In [4]:
y_val_loaded = pd.read_csv(y_val_file_path)
y_train_loaded = pd.read_csv(y_train_file_path)

y_val_loaded = np.array(y_val_loaded)
y_train_loaded = np.array(y_train_loaded)

y_val_in_brain = y_val_loaded[:, 0]
y_val_visible = y_val_loaded[:, 1]
y_train_in_brain = y_train_loaded[:, 0]
y_train_visible = y_train_loaded[:, 1]

In [5]:
with open(X_val_file_path, 'rb') as file:
    X_val_loaded = pickle.load(file)

In [6]:
with open(X_train_file_path, 'rb') as file:
    X_train_loaded = pickle.load(file)

In [7]:
X_train_loaded = X_train_loaded[..., np.newaxis]
X_val_loaded = X_val_loaded[..., np.newaxis]

In [None]:
training_history_file_path = data_gen_folder + 'training_history.txt'
model_save_file_path = data_gen_folder + 'saved_model.keras'

In [8]:
val_visible_ds = Dataset.from_tensor_slices((X_val_loaded, y_val_visible)).batch(32).prefetch(tf.data.AUTOTUNE)
train_visible_ds = Dataset.from_tensor_slices((X_train_loaded, y_train_visible)).batch(32).prefetch(tf.data.AUTOTUNE)

In [10]:
val_in_brain_ds = Dataset.from_tensor_slices((X_val_loaded, y_val_in_brain)).batch(32).prefetch(tf.data.AUTOTUNE)
train_in_brain_ds = Dataset.from_tensor_slices((X_train_loaded, y_train_in_brain)).batch(32).prefetch(tf.data.AUTOTUNE)

In [9]:
del X_train_loaded
del X_val_loaded
gc.collect()

60

In [10]:
processed_image_dim = 512

In [11]:
# This model can be used *separately* for in_brain_with_aneurysm and aneurysm_visible_in_image
# Different architectures may be better suited for each problem
def build_model(hp):
    inputs = Input(shape=(processed_image_dim, processed_image_dim, 1))

    # Vary the number of filters in conv layers
    x = Conv2D(filters=hp.Int('conv1_filters', min_value=32, max_value=64, step=16),
               kernel_size=hp.Choice('conv1_kernel', values=[3, 5]), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(2, padding='same')(x)

    x = Conv2D(filters=hp.Int('conv2_filters', min_value=64, max_value=128, step=32),
               kernel_size=hp.Choice('conv2_kernel', values=[3, 5]), activation='relu', padding='same')(x)
    x = MaxPooling2D(2, padding='same')(x)

    x = Conv2D(filters=hp.Int('conv3_filters', 128, 256, step=64), kernel_size=3, use_bias=False, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(2, padding='same')(x)
    x = Dropout(rate=hp.Float('dropout_1', 0.2, 0.4, step=0.1))(x)

    x = Conv2D(256, 3, activation='relu', padding='same')(x)
    x = MaxPooling2D(2, padding='same')(x)

    x = Conv2D(256, 3, use_bias=False, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=hp.Float('dropout_2', 0.3, 0.5, step=0.1))(x)

    out = Dense(1, activation='sigmoid')(x)

    # Choose between the two (seemingly) most popular optimizers and a variety of learning rates
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop'])
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=hp.Float('adam_lr', 1e-5, 1e-3, sampling='log'))
    else:
        optimizer = RMSprop(learning_rate=hp.Float('rms_lr', 1e-5, 1e-3, sampling='log'))

    model = keras.Model(inputs, out)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[keras.metrics.BinaryAccuracy(), keras.metrics.Recall(), keras.metrics.Precision()]
    )

    return model

In [14]:
tuner_results_folder = data_gen_folder + 'tuner_results'
experiment_name = 'e1'

tuner = Hyperband(
    build_model,                          # your model-building function
    objective='val_loss',                 # what to optimize
    max_epochs=40,                        # maximum epochs for top models
    factor=3,                             # reduction factor per bracket
    hyperband_iterations=2,               # how many full Hyperband rounds
    seed=42,
    directory=tuner_results_folder,
    project_name=experiment_name,
)

Reloading Tuner from /content/drive/My Drive/Colab Notebooks/rsna-intercranial-aneurysm-detection/data_gen_2/tuner_results/e1/tuner0.json


In [22]:
best_model_in_brain = tuner.hypermodel.build(tuner.oracle.get_trial('0067').hyperparameters)
best_model_in_brain.summary()

In [23]:
stop_early = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = best_model_in_brain.fit(
    train_in_brain_ds,
    validation_data=val_in_brain_ds,
    epochs=75,
    callbacks=[stop_early],
)

in_brain_history_path = data_gen_folder + 'in_brain_history.txt'
in_brain_model_path = data_gen_folder + 'in_brain_model.keras'

best_model_in_brain.save(in_brain_model_path)
with open(in_brain_history_path, 'w', encoding='utf-8') as file:
    json.dump(history.history, file, indent=4, ensure_ascii=False)

Epoch 1/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 167ms/step - binary_accuracy: 0.5211 - loss: 0.7363 - precision: 0.5038 - recall: 0.4783 - val_binary_accuracy: 0.5564 - val_loss: 0.6841 - val_precision: 0.5335 - val_recall: 0.6703
Epoch 2/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 110ms/step - binary_accuracy: 0.5637 - loss: 0.6888 - precision: 0.5545 - recall: 0.4833 - val_binary_accuracy: 0.6080 - val_loss: 0.6712 - val_precision: 0.5757 - val_recall: 0.7261
Epoch 3/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 110ms/step - binary_accuracy: 0.5753 - loss: 0.6790 - precision: 0.5684 - recall: 0.4938 - val_binary_accuracy: 0.5989 - val_loss: 0.6763 - val_precision: 0.5631 - val_recall: 0.7679
Epoch 4/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 110ms/step - binary_accuracy: 0.5887 - loss: 0.6713 - precision: 0.5824 - recall: 0.5186 - val_binary_accuracy: 0.5807 - val_loss: 0.6772 

In [14]:
tuner.results_summary(num_trials=5)

Results summary
Results in /content/drive/My Drive/Colab Notebooks/rsna-intercranial-aneurysm-detection/data_gen_2/tuner_results/e1
Showing 5 best trials
Objective(name="val_loss", direction="min")

Trial 0067 summary
Hyperparameters:
conv1_filters: 64
conv1_kernel: 3
conv2_filters: 128
conv2_kernel: 5
conv3_filters: 192
dropout_1: 0.4
dropout_2: 0.5
optimizer: adam
adam_lr: 9.206583038552771e-05
rms_lr: 1.688225189917974e-05
tuner/epochs: 40
tuner/initial_epoch: 14
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0064
Score: 0.5845618844032288

Trial 0044 summary
Hyperparameters:
conv1_filters: 32
conv1_kernel: 3
conv2_filters: 128
conv2_kernel: 5
conv3_filters: 128
dropout_1: 0.2
dropout_2: 0.4
optimizer: adam
adam_lr: 0.00020291579511064818
rms_lr: 1.1998472287810798e-05
tuner/epochs: 14
tuner/initial_epoch: 5
tuner/bracket: 3
tuner/round: 2
tuner/trial_id: 0035
Score: 0.6209691762924194

Trial 0066 summary
Hyperparameters:
conv1_filters: 16
conv1_kernel: 3
conv2_filters: 64
conv2_ke

In [13]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
best_hps.values

{'conv1_filters': 64,
 'conv1_kernel': 3,
 'conv2_filters': 128,
 'conv2_kernel': 5,
 'conv3_filters': 192,
 'dropout_1': 0.4,
 'dropout_2': 0.5,
 'optimizer': 'adam',
 'adam_lr': 9.206583038552771e-05,
 'rms_lr': 1.688225189917974e-05,
 'tuner/epochs': 40,
 'tuner/initial_epoch': 14,
 'tuner/bracket': 2,
 'tuner/round': 2,
 'tuner/trial_id': '0064'}

In [15]:
tuner.oracle.objective.direction

'min'

In [16]:
for trial_id, trial in tuner.oracle.trials.items():
    print(f"Trial {trial_id}: score={trial.score}")

Trial 0002: score=0.6747527122497559
Trial 0001: score=None
Trial 0000: score=None
Trial 0003: score=0.6698911190032959
Trial 0004: score=0.6876376867294312
Trial 0005: score=0.670128583908081
Trial 0006: score=0.6757144927978516
Trial 0007: score=0.6715821623802185
Trial 0008: score=0.6804447174072266
Trial 0009: score=0.6682254076004028
Trial 0010: score=0.6814305186271667
Trial 0011: score=0.6813427209854126
Trial 0012: score=0.6792925000190735
Trial 0013: score=0.6760292053222656
Trial 0014: score=0.6732963919639587
Trial 0015: score=0.6835648417472839
Trial 0016: score=0.6765804886817932
Trial 0017: score=0.6760909557342529
Trial 0018: score=0.6924036145210266
Trial 0019: score=0.6744126081466675
Trial 0020: score=0.6845250129699707
Trial 0021: score=0.6663077473640442
Trial 0022: score=0.6733514070510864
Trial 0023: score=0.6984007954597473
Trial 0024: score=0.681959331035614
Trial 0025: score=0.680156409740448
Trial 0026: score=0.6783248782157898
Trial 0027: score=0.668758034706

In [12]:
tuner_results_folder = data_gen_folder + 'tuner_results_visible'
experiment_name = 'visible_e1'

tuner = Hyperband(
    build_model,                          # your model-building function
    objective='val_loss',                 # what to optimize
    max_epochs=40,                        # maximum epochs for top models
    factor=4,                             # reduction factor per bracket
    hyperband_iterations=1,               # how many full Hyperband rounds
    seed=42,
    directory=tuner_results_folder,
    project_name=experiment_name,
)

stop_early = EarlyStopping(monitor='val_loss', patience=10)

tuner.search(
    train_visible_ds,
    validation_data=val_visible_ds,
    epochs=40,
    callbacks=[stop_early],
)

Trial 42 Complete [00h 24m 26s]
val_loss: 0.21399922668933868

Best val_loss So Far: 0.21025902032852173
Total elapsed time: 04h 33m 32s

Search: Running Trial #43

Value             |Best Value So Far |Hyperparameter
64                |32                |conv1_filters
5                 |3                 |conv1_kernel
128               |64                |conv2_filters
5                 |5                 |conv2_kernel
256               |192               |conv3_filters
0.4               |0.4               |dropout_1
0.3               |0.5               |dropout_2
adam              |adam              |optimizer
0.00056757        |0.00032415        |adam_lr
0.00034682        |1.2016e-05        |rms_lr
40                |40                |tuner/epochs
0                 |10                |tuner/initial_epoch
0                 |1                 |tuner/bracket
0                 |1                 |tuner/round

Epoch 1/40
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m

KeyboardInterrupt: 

In [15]:
tuner.results_summary(num_trials=5)

Results summary
Results in /content/drive/My Drive/Colab Notebooks/rsna-intercranial-aneurysm-detection/data_gen_2/tuner_results_visible/visible_e1
Showing 5 best trials
Objective(name="val_loss", direction="min")

Trial 0039 summary
Hyperparameters:
conv1_filters: 32
conv1_kernel: 3
conv2_filters: 64
conv2_kernel: 5
conv3_filters: 192
dropout_1: 0.4
dropout_2: 0.5
optimizer: adam
adam_lr: 0.0003241463219718352
rms_lr: 1.2015957297929577e-05
tuner/epochs: 40
tuner/initial_epoch: 10
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0030
Score: 0.21025902032852173

Trial 0040 summary
Hyperparameters:
conv1_filters: 48
conv1_kernel: 5
conv2_filters: 64
conv2_kernel: 3
conv3_filters: 192
dropout_1: 0.4
dropout_2: 0.4
optimizer: adam
adam_lr: 0.0005550498129786479
rms_lr: 6.601146406744397e-05
tuner/epochs: 40
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.21210221946239471

Trial 0028 summary
Hyperparameters:
conv1_filters: 32
conv1_kernel: 3
conv2_filters: 128
conv2_kernel:

In [22]:
tuner.get_best_hyperparameters(1)[0].values

{'conv1_filters': 32,
 'conv1_kernel': 3,
 'conv2_filters': 64,
 'conv2_kernel': 5,
 'conv3_filters': 192,
 'dropout_1': 0.4,
 'dropout_2': 0.5,
 'optimizer': 'adam',
 'adam_lr': 0.0003241463219718352,
 'rms_lr': 1.2015957297929577e-05,
 'tuner/epochs': 40,
 'tuner/initial_epoch': 10,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0030'}

In [13]:
best_model_visible = tuner.hypermodel.build(tuner.get_best_hyperparameters(1)[0])
best_model_visible.summary()

In [23]:
stop_early = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = best_model_visible.fit(
    train_visible_ds,
    validation_data=val_visible_ds,
    epochs=75,
    callbacks=[stop_early],
)

visible_history_path = data_gen_folder + 'visible_history.txt'
visible_model_path = data_gen_folder + 'visible_model.keras'

best_model_visible.save(visible_model_path)
with open(visible_history_path, 'w', encoding='utf-8') as file:
    json.dump(history.history, file, indent=4, ensure_ascii=False)

Epoch 1/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 73ms/step - binary_accuracy: 0.8956 - loss: 0.3342 - precision_1: 0.0979 - recall_1: 0.0049 - val_binary_accuracy: 0.9008 - val_loss: 0.3148 - val_precision_1: 0.0000e+00 - val_recall_1: 0.0000e+00
Epoch 2/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 63ms/step - binary_accuracy: 0.8986 - loss: 0.3062 - precision_1: 0.3782 - recall_1: 0.0126 - val_binary_accuracy: 0.8925 - val_loss: 0.3095 - val_precision_1: 0.3814 - val_recall_1: 0.1331
Epoch 3/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 64ms/step - binary_accuracy: 0.8982 - loss: 0.2881 - precision_1: 0.4087 - recall_1: 0.0321 - val_binary_accuracy: 0.8831 - val_loss: 0.3349 - val_precision_1: 0.3729 - val_recall_1: 0.2604
Epoch 4/75
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 64ms/step - binary_accuracy: 0.8980 - loss: 0.2759 - precision_1: 0.4501 - recall_1: 0.0724 - val_binary_accur