In [1]:
!pip install classification-models-3D

Collecting classification-models-3D
  Downloading classification_models_3D-1.0.2-py3-none-any.whl (45 kB)
[K     |████████████████████████████████| 45 kB 1.5 MB/s 
Installing collected packages: classification-models-3D
Successfully installed classification-models-3D-1.0.2


In [2]:
# !pip install efficientnet-3D keras_applications

In [3]:
!pip install keras_applications

Collecting keras_applications
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 2.8 MB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8


In [4]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
import cv2
import time
import glob
import os
import pandas
from tensorflow.keras import layers
from classification_models_3D.keras import Classifiers
tf.random.set_seed(1)
np.random.seed(1)
#random.seed(1)

In [5]:
def get_all_slices(df,base_dir): 
    all_paths = []
    for i in list(df['folder_id']):
        i = os.path.join(base_dir,i)
        all_paths.append(len(glob.glob(i+'/flair/*')))
    return all_paths

def split_train_test(slices_list,folders_list,label_list,split_ratio=0.1):
    test_size = int(len(slices_list)*split_ratio)
    test_slices_list = slices_list[:test_size]
    test_folders_list = folders_list[:test_size]
    test_label_list = label_list[:test_size]
    train_slices_list = slices_list[test_size:]
    train_folders_list = folders_list[test_size:]
    train_label_list = label_list[test_size:]
    return train_slices_list,train_folders_list,train_label_list,test_slices_list,test_folders_list,test_label_list

In [6]:
df = pd.read_csv('../input/rsnasubmissionresult/result.csv',dtype='str')
base_dir = '../input/classify-tumor-best/DATATUMORONLY_TRAIN/train'
#slices_list = np.array(get_all_slices(df,base_dir))

In [7]:
train_df = df.iloc[:525,:]
test_df = df.iloc[526:,:]

In [8]:
train_slices_list = np.array(get_all_slices(train_df,base_dir))
test_slices_list = np.array(get_all_slices(test_df,base_dir))
#slices_list = np.array(list(df['flair']))
train_folders_list = np.array(list(train_df['folder_id']))
test_folders_list = np.array(list(test_df['folder_id']))
train_label_list = np.array(list(train_df['MGMT_value']))
test_label_list = np.array(list(test_df['MGMT_value']))
indexes = np.where((train_slices_list > 0 )&(train_slices_list < 50))
train_slices_list = np.take(train_slices_list,indexes)[0]
train_folders_list = np.take(train_folders_list,indexes)[0]
train_label_list = np.take(train_label_list,indexes)[0]
indexes = np.where((test_slices_list > 0 )&(test_slices_list < 50))
test_slices_list = np.take(test_slices_list,indexes)[0]
test_folders_list = np.take(test_folders_list,indexes)[0]
test_label_list = np.take(test_label_list,indexes)[0]

In [9]:
# df = pd.read_csv('../input/rsnasubmissionresult/result.csv',dtype='str')
# base_dir = '../input/classify-tumor-best/DATATUMORONLY_TRAIN/train'
# slices_list = np.array(get_all_slices(df,base_dir))
# #slices_list = np.array(list(df['flair']))
# folders_list = np.array(list(df['folder_id']))
# label_list = np.array(list(df['MGMT_value']))
# indexes = np.where((slices_list > 0 )&(slices_list < 50))
# slices_list = np.take(slices_list,indexes)[0]
# folders_list = np.take(folders_list,indexes)[0]
# label_list = np.take(label_list,indexes)[0]
# shuffler = np.random.permutation(len(slices_list))
# slices_list = slices_list[shuffler]
# folders_list = folders_list[shuffler]
# label_list = label_list[shuffler]
# train_slices_list,train_folders_list,train_label_list,\
# test_slices_list,test_folders_list,test_label_list = split_train_test(slices_list,folders_list,label_list,split_ratio=0.1)

In [10]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self,slices_list,folders_list,label_list,width=256,height=256,batch_size=16,shuffle=True):
        self.batch_size = batch_size
        self.base_dir = '../input/classify-tumor-best/DATATUMORONLY_TRAIN/train'
        self.width = width
        self.crop_length = 224
        self.height = height
        self.tolerance = 5
        self.shuffle = shuffle
        self.intial_slices_list = slices_list
        self.intial_folders_list = folders_list
        self.intial_label_list = label_list
        #print(len(self.slices_list))
        self.on_epoch_end()
    
    def on_epoch_end(self):
        print('epoch ended')
        self.slices_list = self.intial_slices_list.copy()
        self.folders_list = self.intial_folders_list.copy()
        self.label_list = self.intial_label_list.copy()
        if self.shuffle:
            shuffler = np.random.permutation(len(self.slices_list))
            self.slices_list = self.slices_list[shuffler]
            self.folders_list = self.folders_list[shuffler]
            self.label_list = self.label_list[shuffler]

    def __len__(self):
        return len(self.intial_slices_list)
    
    def __getitem__(self,user_index):
        start =time.time()
        index = self.slices_list[0]
        #print(len(self.slices_list))
        labels = []
        indexes = np.where((self.slices_list >= index-self.tolerance) &(self.slices_list <= index+self.tolerance))
        tol_slice= np.take(self.slices_list, indexes)[0]
        tol_folder= np.take(self.folders_list, indexes)[0]
        random_indexes = np.random.choice(indexes[0], size=min(self.batch_size,len(tol_folder)),replace=False)
        random_folder = np.take(self.folders_list,random_indexes)
        random_slices = np.take(self.slices_list,random_indexes)
        random_labels = np.take(self.label_list,random_indexes)
        self.folders_list = np.delete(self.folders_list,random_indexes)
        self.slices_list = np.delete(self.slices_list,random_indexes)
        self.label_list = np.delete(self.label_list,random_indexes)
        #print(len(self.slices_list))
        self.max_depth = random_slices.max()
        #print(random_folder)
        batch_x = self.__data_gen_batch(random_folder)
        #for i in random_folder:
        #    labels.append(int(self.label_list[np.where(self.folders_list == i)[0]][0]))
        #print(labels)
        return preprocess_input(batch_x),self.one_hot_encoder(random_labels.astype(np.int8))
    
    def one_hot_encoder(self,y):
        b = np.zeros((len(y), 2))
        b[np.arange(len(y)),y] = 1
        return b
    
    def get_max_len(self,batch,min_depth=50):
        max_len = 0
        for patient_id in batch['folder_id']:
            #print(os.path.join(self.base_dir,patient_id,'flair/*'))
            length = len(glob.glob(os.path.join(self.base_dir,patient_id,'flair/*')))
            if length > max_len:
                max_len = length
        if max_len < min_depth:
            max_len = min_depth
        return max_len

    def __data_gen_image(self,folder_name):
        flair_path = glob.glob(os.path.join(self.base_dir,folder_name,'flair/*'))
        flair_path = sorted(flair_path,key=lambda x:x.split('-')[-1].split('.')[-2].zfill(3))
        all_images = []
        all_images = np.zeros(shape=(self.max_depth,self.height,self.height,1),dtype=np.float64)
        for i,img_path in enumerate(flair_path):
            img = image.load_img(img_path,target_size=(self.height,self.width),color_mode='grayscale')
            img = image.img_to_array(img)
            all_images[i,] = img
        return np.transpose(all_images,(1,2,0,3))

    def __data_gen_batch(self,folder_names):
        batch_data = np.empty(shape=(len(folder_names),self.height,self.width,self.max_depth,1))
        for i,patient_id in enumerate(folder_names):
            batch_data[i,] = self.__data_gen_image(patient_id)
        return batch_data
    
    def crop(self,image,crop_length=224):
        img_height ,img_width = image.shape[:2]
        start_y = (img_height - self.crop_length) // 2
        start_x = (img_width - self.crop_length) // 2
        cropped_image=image[start_y:(img_height - start_y), start_x:(img_width - start_x), :]
        return cropped_image

In [11]:
train_datagen = DataGenerator(train_slices_list,train_folders_list,train_label_list,batch_size=5,height=224,width=224,shuffle=True)
test_datagen = DataGenerator(test_slices_list,test_folders_list,test_label_list,batch_size=1,height=224,width=224,shuffle=True)

epoch ended
epoch ended


In [12]:
# for _ in range(3):
#     print('new epoch')
#     for i in range(len(test_datagen)-1):
#         x,y = test_datagen[i]
#         print(i,x.shape,len(test_datagen.slices_list))
#     test_datagen.on_epoch_end()

In [13]:
# basemodel = efn.EfficientNetB0(input_shape=(256, 256, None, 1), weights=None)
# x = layers.GlobalAveragePooling3D()(basemodel.output)
# x = layers.Dense(units=128, activation="relu")(x)
# x = layers.Dropout(0.1)(x)
# outputs = layers.Dense(units=2, activation="softmax")(x)
# # Define the model.
# model = keras.Model(basemodel.input, outputs, name="eff3dcnn")
# model.summary()

In [14]:
ResNet18, preprocess_input = Classifiers.get('resnet18')
model = ResNet18(input_shape=(224, 224, None, 1), weights=None,include_top=True)

In [15]:
#x = layers.Dense(units=128, activation="relu")(model.layers[-3].output)
#x = layers.Dropout(0.1)(x)
outputs = layers.Dense(units=2, activation="softmax")(model.layers[-3].output)
# Define the model.
new_model = keras.Model(model.input, outputs, name="resnet18_3d")
new_model.summary()

Model: "resnet18_3d"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               [(None, 224, 224, No 0                                            
__________________________________________________________________________________________________
bn_data (BatchNormalization)    (None, 224, 224, Non 3           data[0][0]                       
__________________________________________________________________________________________________
zero_padding3d (ZeroPadding3D)  (None, 230, 230, Non 0           bn_data[0][0]                    
__________________________________________________________________________________________________
conv0 (Conv3D)                  (None, 112, 112, Non 21952       zero_padding3d[0][0]             
________________________________________________________________________________________

In [16]:
os.makedirs('models')
os.makedirs('logs')

In [17]:
new_model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath="models/3d_image_classification.hdf5", save_best_only=True,monitor="val_accuracy",mode="max",verbose=0)
summary = tf.keras.callbacks.TensorBoard(log_dir="./logs",update_freq=1,histogram_freq=2)

In [18]:
new_model.fit(
    train_datagen,
    steps_per_epoch=len(train_datagen)//5,
    validation_data=test_datagen,\
    validation_steps=len(test_datagen)-2,
    epochs=300,
    verbose=1,
    callbacks = [checkpoint_cb,summary]
)

Epoch 1/300
epoch ended
Epoch 2/300
epoch ended
Epoch 3/300
epoch ended
Epoch 4/300
epoch ended
Epoch 5/300
epoch ended
Epoch 6/300
epoch ended
Epoch 7/300
epoch ended
Epoch 8/300
epoch ended
Epoch 9/300
epoch ended
Epoch 10/300
epoch ended
Epoch 11/300
epoch ended
Epoch 12/300
epoch ended
Epoch 13/300
epoch ended
Epoch 14/300
epoch ended
Epoch 15/300
epoch ended
Epoch 16/300
epoch ended
Epoch 17/300
epoch ended
Epoch 18/300
epoch ended
Epoch 19/300
epoch ended
Epoch 20/300
epoch ended
Epoch 21/300
epoch ended
Epoch 22/300
epoch ended
Epoch 23/300
epoch ended
Epoch 24/300
epoch ended
Epoch 25/300
epoch ended
Epoch 26/300
epoch ended
Epoch 27/300
epoch ended
Epoch 28/300
epoch ended
Epoch 29/300
epoch ended
Epoch 30/300
epoch ended
Epoch 31/300
epoch ended
Epoch 32/300
epoch ended
Epoch 33/300
epoch ended
Epoch 34/300
epoch ended
Epoch 35/300
epoch ended
Epoch 36/300
epoch ended
Epoch 37/300
epoch ended
Epoch 38/300
epoch ended
Epoch 39/300
epoch ended
Epoch 40/300
epoch ended
Epoch 41/

<tensorflow.python.keras.callbacks.History at 0x7f7700df2f90>

In [19]:
new_model.save('best_50.hdf5')

In [20]:
train_datagen = DataGenerator(train_slices_list,train_folders_list,train_label_list,batch_size=5,height=224,width=224,shuffle=True)

epoch ended


In [21]:
true_cnt = 0
all_cnt = 0
for i in range(len(train_datagen)//5):
    x,y = train_datagen[i]
    y_pred = new_model.predict(x)
    output = np.argmax(np.round_(y_pred,1),axis=1)==np.argmax(y,1)
    true_cnt += sum(output)
    all_cnt += len(output)