In [2]:
import os
import glob
import re

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf

from sklearn.model_selection import train_test_split

In [3]:
df_train = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")

In [4]:
NUMBER_OF_FRAMES = 10
BATCH_SIZE = 8
EPOCHS = 100
IMGS_SIZE = 224
LSTM_UNITS = 256

In [5]:
def get_paths(df, core_path: str):
    
    path_list = []
    
    for i in df["BraTS21ID"]:
        if len(str(i)) < 5:
            part = "0" * (5-len(str(i))) + str(i)
            full_path = os.path.join(core_path, part, "FLAIR")
            path_list.append(full_path)
            
        else:
            path_list.append(os.path.join(core_path, i, "FLAIR"))
    
    return path_list

In [6]:
train_path = "../input/rsna-miccai-png/train"
df_train["path"] = get_paths(df_train, train_path)
df_train = df_train.loc[df_train.BraTS21ID!=109]
df_train = df_train.loc[df_train.BraTS21ID!=709]
df_train = df_train.reset_index(drop=True)
df_train.head()

In [7]:
def frame_selection(path):
    
    r = re.compile('([0-9]+).png')
    
    user_img_paths = []
    img_number = []
    
    paths = glob.glob(path.decode('utf8') + "/*.png")
    
    for path in paths:
        
        img_number.append(int(r.search(path).group(1)))
            
    n_img_path = np.array(paths)
    n_img_num = np.array(img_number)
        
    inds = n_img_num.argsort()
    sorted_user_list = n_img_path[inds].tolist()
    
    start = tf.random.uniform((1,), maxval=len(sorted_user_list)-NUMBER_OF_FRAMES, dtype=tf.int32)
    
    
    ### Place number of frames in brackets to create a tensor
    paths = tf.slice(sorted_user_list, start, [NUMBER_OF_FRAMES])
                
    img = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn=img_loader, elems=paths, fn_output_signature=tf.float32))
        
    return img

In [8]:
def img_loader(path):
    
    img = tf.io.read_file(path)
    img = decode_image(img)
    img = tf.image.resize(img, (IMGS_SIZE,IMGS_SIZE))
    
    return img

def decode_image(image):
    
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    
    return image

In [9]:
### Change ###
def load_frame(df_dict):
    
    dirname = df_dict["path"]
    paths = tf.numpy_function(frame_selection, [dirname], tf.float32)
    
    # Parse label
    label = df_dict["MGMT_value"]
    label = tf.cast(label, tf.float32)
    
    return paths, label

In [10]:
train_df, valid_df = train_test_split(df_train, test_size=0.1, stratify=df_train.MGMT_value.values)
print(f'Size of train_df: {len(train_df)}; valid_df: {len(valid_df)}')

In [11]:
AUTOTUNE = tf.data.AUTOTUNE

trainloader = tf.data.Dataset.from_tensor_slices(dict(train_df))
validloader = tf.data.Dataset.from_tensor_slices(dict(valid_df))


trainloader= (
    trainloader
    .shuffle(1024)
    .map(load_frame, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

validloader = (
    validloader
    .map(load_frame, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

In [12]:
def image_plotting(loader):
    
    
    ax, fig = plt.subplots(2,5,figsize=(10,2))
    
    for i in range(NUMBER_OF_FRAMES):
        
        i=+1
        frames, lab = next(iter(loader))
        frames = frames[0,0,:,:]
    
        plt.subplot(2,5, i)
        
        plt.imshow(frames)

In [13]:
#image_plotting(trainloader)

In [14]:
ax, fig = plt.subplots(figsize=(15,2))

for i in range(NUMBER_OF_FRAMES):
    
    i += 1
    frames, labels = next(iter(trainloader))
    frames = frames[0,0,:,:]
    
    plt.subplot(2,5, i)
    plt.imshow(frames)

In [15]:
### Functions for training ###

def model_generation():
    
    tf.keras.backend.clear_session()
    transfer_learning = tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet")
    transfer_learning.trainable = True
    inputs = tf.keras.Input(shape=(IMGS_SIZE, IMGS_SIZE, 1))
    
    
    x = tf.keras.layers.Conv2D(3, kernel_size=(3,3), padding="same", activation="relu")(inputs)
    x = transfer_learning(x, training=True)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    
    return tf.keras.Model(inputs, x)

model = model_generation()
model.summary()

In [16]:
### Temportal model generation ###

def temp_model_generation():
    
    tf.keras.backend.clear_session() 
    inputs = tf.keras.Input(shape = (NUMBER_OF_FRAMES, IMGS_SIZE, IMGS_SIZE, 1))
    
    base_model = model_generation()
    
    #Wrapper that allows the application on each temporal layer
    time_wrap = tf.keras.layers.TimeDistributed(base_model)(inputs)
    lstm = tf.keras.layers.LSTM(LSTM_UNITS, return_sequences = True)(time_wrap)
    outro = tf.keras.layers.Dense(1, activation="sigmoid")(lstm)
    
    return tf.keras.Model(inputs, outro)


tf.keras.backend.clear_session() 
final_model = temp_model_generation()
final_model.summary()

In [17]:
### Model Callbacks ####
## Expand this to plato###

earlystopper = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=10, verbose=0, mode="min",
    restore_best_weights=True
)

save_loc = r'C:\Users\omar.gluhic\Desktop\Education\Udemy\OOPPython\models'
save_loc = os.path.join(save_loc,'best_model.h5')

model_save = tf.keras.callbacks.ModelCheckpoint(save_loc,
                             save_best_only = True, 
                             monitor = 'val_auc', 
                             mode = 'max', verbose = 1)

In [18]:
tf.keras.backend.clear_session() 
final_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='binary_crossentropy', metrics=['acc'])



history = final_model.fit(trainloader,
                         epochs = EPOCHS,
                         validation_data = validloader,
                         callbacks=[earlystopper,model_save])

In [19]:
loss, acc = final_model.evaluate(validloader)