Learning how to use [KerasTuner](https://keras.io/keras_tuner/) to build a better Tensorflow model.

## TODO

- Add kfold option

## Versions

- V13: Trying augmentation. (e.g. experimental.preprocessing.RandomFlip)
- V12: Added needed reset_index(drop=True)
- V11: Break out submission results to view intermediate values. Add Kfold value but it's unused.
- V10: Save model: best_model.save("best_model")
- V9: Switching back to make_model. Trying max_trials=15
```python
tuner = kt.tuners.BayesianOptimization(
#     make_model_siren,
    make_model,
    objective='val_loss',
    max_trials=5,  # Set to 5 to run quicker, but need 100+ for good results
    overwrite=True)
```
- V8: Siren Submission: LB: 0.667
- V7: Working on Siren Layer: 
- V6: Full save: max_trials=100
- V5: Increasing max_trials from 5 to 100. If Quick Save doesn't work, there will be a V6.
```python
tuner = kt.tuners.BayesianOptimization(
    make_model,
    objective='val_loss',
    max_trials=5,  # Set to 5 to run quicker, but need 100+ for good results
    overwrite=True)
```
- V4: Tuning filters and units.  LB: 0.702
```python
    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=16, max_value=64, step=8),
        activation='relu'
    )(x)
```    
- V3: Documentation
- V2: Documentation
- V1: Only tuning Dropout()
```python
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
```

## References

- [Keras + KerasTuner best practices](https://www.kaggle.com/fchollet/keras-kerastuner-best-practices)
- [MoA: Keras + KerasTuner best practices](https://www.kaggle.com/fchollet/moa-keras-kerastuner-best-practices)
- [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition) - Chapter 13
- [Jane Street: Neural Network Starter](https://www.kaggle.com/gogo827jz/jane-street-neural-network-starter)
- [NN Model tuning with Keras Tuner](https://www.kaggle.com/sirishapb/nn-model-tuning-with-keras-tuner)


# Load Libraries

In [None]:
import os
import glob

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import pandas as pd
import numpy as np
from pathlib import Path

import random
from tqdm.notebook import tqdm
import pydicom # Handle MRI images

import cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn import model_selection

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras.initializers import RandomUniform


# Load Datasets

In [None]:
data_dir = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/')

mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]
excluded_images = [109, 123, 709] # Bad images

train_df = pd.read_csv(data_dir / "train_labels.csv")
test_df = pd.read_csv(data_dir / "sample_submission.csv")
sample_submission = pd.read_csv(data_dir / "sample_submission.csv")

train_df = train_df[~train_df.BraTS21ID.isin(excluded_images)].reset_index(drop=True)



# KFold - Future Features

In [None]:
def create_folds(data, num_splits):
    data["kfold"] = -1
    kf = model_selection.KFold(n_splits=num_splits, shuffle=True, random_state=42)
    for f, (t, v) in enumerate(kf.split(X=data)):
        data.loc[v, "kfold"] = f
    return data

In [None]:
# EPOCHS=20
k = 5

train_df = create_folds(train_df, k)

In [None]:
train_df.head()

# Utility Functions

In [None]:
def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Not super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    # transform data into black and white scale / grayscale
#     data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

def get_all_image_paths(brats21id, image_type, folder='train'): 
    '''
    Returns an arry of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in mri_types)
    
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    
    start = int(num_images * 0.25)
    end = int(num_images * 0.75)

    interval = 3
    
    if num_images < 10: 
        interval = 1
    
    return np.array(paths[start:end:interval])

def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]

def get_all_data_for_train(image_type, image_size=32):
    global train_df
    
    X = []
    y = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', image_size)
        label = x['MGMT_value']

        X += images
        y += [label] * len(images)
        train_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X) == len(y))
    return np.array(X), np.array(y), np.array(train_ids)

def get_all_data_for_test(image_type, image_size=32):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'test', image_size)
        X += images
        test_ids += [int(x['BraTS21ID'])] * len(images)

    return np.array(X), np.array(test_ids)

# Load all Images
```
X - contains all the images for each patient 
trainidt - trainidt is a mask vector into X, y for training.  There's a patient id/BraTS21ID corresponding to each image (e.g. (0, 0, 0, 0, 2,2, 3,3,3,3,3,...) )
testidt - testidt is a mask vector into X_test for testing
```

In [None]:
X, y, trainidt = get_all_data_for_train('T1wCE', image_size=32)
X_test, testidt = get_all_data_for_test('T1wCE', image_size=32)

# Train/Validation Split

In [None]:
X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=42)

## Adding a Dimension

In [None]:
X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)
X_train.shape

## One-hot encode labels

In [None]:
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)

# Tunable Model

## Using the SIREN activation layer. Refer to https://vsitzmann.github.io/siren/ for more details.

In [None]:
class SineDenseLayer(keras.layers.Layer):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, features,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.features = features
        
        if self.is_first:
            initializer = RandomUniform(-1 / self.features, 1 / self.features)   
            self.linear = keras.layers.Dense(features, kernel_initializer=initializer)
    
        else:
            initializer = RandomUniform(-np.sqrt(6 / self.features) / self.omega_0, np.sqrt(6 / self.features) / self.omega_0)
            self.linear = keras.layers.Dense(features, kernel_initializer=initializer)
     

    def call(self, input):
        return tf.math.sin(self.omega_0 * self.linear(input))
    
#     def forward_with_intermediate(self, input): 
#         # For visualization of activation distributions
#         intermediate = self.omega_0 * self.linear(input)
#         return tf.math.sin(intermediate), intermediate

class SineConvLayer(keras.layers.Layer):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, features, kernel_size,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.features = features
        
        if self.is_first:
            initializer = RandomUniform(-1 / self.features, 1 / self.features)            
            self.conv = keras.layers.Conv2D(features, kernel_size, kernel_initializer=initializer)
            
        else:
            initializer = RandomUniform(-np.sqrt(6 / self.features) / self.omega_0, np.sqrt(6 / self.features) / self.omega_0)
            self.conv = keras.layers.Conv2D(features, kernel_size, kernel_initializer=initializer)
            

    def call(self, input):
        return tf.math.sin(self.omega_0 * self.conv(input))
    
#     def forward_with_intermediate(self, input): 
#         # For visualization of activation distributions
#         intermediate = self.omega_0 * self.linear(input)
#         return tf.math.sin(intermediate), intermediate



In [None]:
import keras_tuner as kt


def make_model(hp):
    inputs = keras.Input(shape=X_train.shape[1:])
    
    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs)

#     num_block = hp.Int('num_block', min_value=2, max_value=5, step=1)
#     num_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    
#     x = keras.layers.Conv2D(64, kernel_size=(4, 4), activation="relu", name="Conv_1")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_Conv_1_' + str(0),
                                            min_value=64,
                                            max_value=256,
                                            step=32),
                            kernel_size=(4, 4),
                            activation="relu", 
                            name="Conv_1")(x)

    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_conv2_' + str(1),
                                            min_value=16,
                                            max_value=128,
                                            step=16),
                            kernel_size=(2, 2),
                            activation="relu",
                            name="Conv_2")(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
#     for i in range(num_block):
#         x = keras.layers.Conv2D(num_filters, 
#                                 kernel_size=(4, 4),
#                                 activation="relu",
#                                 )(x)
    
#         x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
#     x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)

#     h = keras.layers.Dropout(0.1)(h)
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
#     reduction_type = hp.Choice('reduction_type', ['flatten', 'avg'])
#     if reduction_type == 'flatten':
#         x = layers.Flatten()(x)
#     else:
#         x = layers.GlobalAveragePooling2D()(x)
        
#     x = keras.layers.Dense(32, activation="relu")(x)
    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=16, max_value=64, step=8),
        activation='relu'
    )(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

# Augmentation

- https://www.tensorflow.org/guide/keras/preprocessing_layers
- https://keras.io/examples/vision/image_classification_from_scratch/

In [None]:
def make_model_augmented(hp):
    input_shape = (32, 32, 1)
    classes = 10

    # Create a data augmentation stage with horizontal flipping, rotations, zooms
#     data_augmentation = keras.Sequential(
#         [
#             layers.experimental.preprocessing.RandomFlip("horizontal"),
#             layers.experimental.preprocessing.RandomRotation(0.1),
#             layers.experimental.preprocessing.RandomZoom(0.1),
#         ]
#     )
    
    data_augmentation = keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal"),
        layers.experimental.preprocessing.RandomRotation(0.1),
    ]
)

    shape=X_train.shape[1:]
    print(f"shape={shape}") # shape=(32, 32, 1)
    
    inputs = keras.Input(shape=input_shape)
    x = data_augmentation(inputs)

    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(x)
#     x = layers.experimental.preprocessing.RandomFlip("horizontal")(x),
#     x = layers.experimental.preprocessing.RandomRotation(0.1)(x),
#     x = layers.experimental.preprocessing.RandomZoom(
#         height_factor = 0.2,
#         width_factor = -0.3,
#         fill_mode = "constant",
#         interpolation = "bilinear",
#         seed = 42
#     )(x),
#     num_block = hp.Int('num_block', min_value=2, max_value=5, step=1)
#     num_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    
#     x = keras.layers.Conv2D(64, kernel_size=(4, 4), activation="relu", name="Conv_1")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_Conv_1_' + str(0),
                                            min_value=64,
                                            max_value=256,
                                            step=32),
                            kernel_size=(4, 4),
                            activation="relu", 
                            name="Conv_1")(x)

    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_conv2_' + str(1),
                                            min_value=16,
                                            max_value=128,
                                            step=16),
                            kernel_size=(2, 2),
                            activation="relu",
                            name="Conv_2")(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
#     for i in range(num_block):
#         x = keras.layers.Conv2D(num_filters, 
#                                 kernel_size=(4, 4),
#                                 activation="relu",
#                                 )(x)
    
#         x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
#     x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)

#     h = keras.layers.Dropout(0.1)(h)
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
#     reduction_type = hp.Choice('reduction_type', ['flatten', 'avg'])
#     if reduction_type == 'flatten':
#         x = layers.Flatten()(x)
#     else:
#         x = layers.GlobalAveragePooling2D()(x)
        
#     x = keras.layers.Dense(32, activation="relu")(x)
    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=16, max_value=64, step=8),
        activation='relu'
    )(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

In [None]:
import keras_tuner as kt


def make_model_siren(hp):
    inputs = keras.Input(shape=X_train.shape[1:])
    
    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs)

    x = SineConvLayer(features=hp.Int('features_conv_1', min_value=64, max_value=256, step=32),
                      kernel_size=hp.Int('kernel_conv_1', min_value=2, max_value=7, step=1),
                      is_first=True, 
                      omega_0=hp.Int('omega_0_conv_1', min_value=10, max_value=50, step=5))(x)
    
    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

    x = SineConvLayer(features=hp.Int('features_conv_2', min_value=16, max_value=128, step=16),
                      kernel_size=hp.Int('kernel_conv_2', min_value=2, max_value=7, step=1),
                      is_first=False, 
                      omega_0=hp.Int('omega_0_conv_2', min_value=10, max_value=50, step=5))(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
    x = SineDenseLayer(features=hp.Int('features_dense_1', min_value=64, max_value=256, step=32),
                      is_first=False, 
                      omega_0=hp.Int('omega_0_dense_1', min_value=10, max_value=50, step=5))(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

# Hyperparameter Search

In [None]:
tuner = kt.tuners.BayesianOptimization(
#     make_model_siren,
#     make_model,
    make_model_augmented,
    objective='val_loss',
    max_trials=5,  # Set to 5 to run quicker, but need 100+ for good results
    overwrite=True)

callbacks=[keras.callbacks.EarlyStopping(monitor='val_roc_acc', mode='max', patience=3, baseline=0.9)]

tuner.search(X_train, y_train, validation_split=0.2, callbacks=callbacks, verbose=1, epochs=20)

# Find the best epoch value

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
best_model = make_model(best_hp)

# Save Model

In [None]:
best_model.save("best_model")

In [None]:
history = best_model.fit(X_train, y_train, validation_split=0.2, epochs=50)

# Predictions on Validation Set

In [None]:
y_pred = best_model.predict(X_valid)

pred = np.argmax(y_pred, axis=1)

result = pd.DataFrame(trainidt_valid)
result[1] = pred

result.columns = ["BraTS21ID", "MGMT_value"]
result2 = result.groupby("BraTS21ID", as_index=False).mean()
result2

In [None]:
result2 = result2.merge(train_df, on="BraTS21ID")
result2

In [None]:
auc = roc_auc_score(
    result2.MGMT_value_y,
    result2.MGMT_value_x,
)
print(f"Validation AUC={auc}")


# Predictions on the Test Set

In [None]:
y_pred = best_model.predict(X_test)

pred = np.argmax(y_pred, axis=1) #

result = pd.DataFrame(testidt)
result[1] = pred
pred

# Submission File

In [None]:
result.columns=['BraTS21ID','MGMT_value']

result2 = result.groupby('BraTS21ID',as_index=False).mean()
result2['BraTS21ID'] = sample_submission['BraTS21ID']

result2

In [None]:
# Rounding... 0.907866 -> 0.9
result2['MGMT_value'] = result2['MGMT_value'].apply(lambda x:round(x*10)/10)
# result2['MGMT_value'] = result2['MGMT_value'] # No rounding
result2.to_csv('submission.csv',index=False)
result2