In [20]:
import tensorflow as tf
import scipy
import sklearn
from sklearn.model_selection import train_test_split
from nilearn.image import concat_imgs, mean_img, resample_img

import tensorflow as tflow
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from keras.layers.core import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras import Input

import os

import nibabel as nib
import nilearn

import json 

import numpy as np
import math
from random import sample

from sklearn.utils.class_weight import compute_class_weight

In [9]:
!ls './preprocessed-data/images/'

case_00000.nii.gz case_00075.nii.gz case_00150.nii.gz case_00225.nii.gz
case_00001.nii.gz case_00076.nii.gz case_00151.nii.gz case_00226.nii.gz
case_00002.nii.gz case_00077.nii.gz case_00152.nii.gz case_00227.nii.gz
case_00003.nii.gz case_00078.nii.gz case_00153.nii.gz case_00228.nii.gz
case_00004.nii.gz case_00079.nii.gz case_00154.nii.gz case_00229.nii.gz
case_00005.nii.gz case_00080.nii.gz case_00155.nii.gz case_00230.nii.gz
case_00006.nii.gz case_00081.nii.gz case_00156.nii.gz case_00231.nii.gz
case_00007.nii.gz case_00082.nii.gz case_00157.nii.gz case_00232.nii.gz
case_00008.nii.gz case_00083.nii.gz case_00158.nii.gz case_00233.nii.gz
case_00009.nii.gz case_00084.nii.gz case_00159.nii.gz case_00234.nii.gz
case_00010.nii.gz case_00085.nii.gz case_00160.nii.gz case_00235.nii.gz
case_00011.nii.gz case_00086.nii.gz case_00161.nii.gz case_00236.nii.gz
case_00012.nii.gz case_00087.nii.gz case_00162.nii.gz case_00237.nii.gz
case_00013.nii.gz case_00088.nii.gz case_00163.nii.

In [10]:
rootdir = './preprocessed-data/images'
image_paths_list = []
annotation_paths_list = []

for file in os.listdir(rootdir):
    d = os.path.join(rootdir, file)
    image_paths_list.append(d)
    
    
image_dict = {}

for img_path in image_paths_list[:150]:
    case_id = img_path[27:37]
    ct_nii = nib.load(img_path).get_fdata()
    print(case_id, ct_nii.shape)
    image_dict[case_id] = ct_nii

with open('../kits21/kits21/data/kits.json') as user_file:
    file_contents = user_file.read()

meta_list = json.loads(file_contents)

labels_dict = {}
for case in meta_list:
    c_id = case['case_id']
    labels_dict[c_id] = case['malignant']

for key, value in labels_dict.items():
    print(key, value)
    


case_00262 (63, 224, 224)
case_00201 (96, 224, 224)
case_00270 (93, 224, 224)
case_00213 (636, 224, 224)
case_00246 (598, 224, 224)
case_00258 (107, 224, 224)
case_00092 (98, 224, 224)
case_00225 (103, 224, 224)
case_00192 (133, 224, 224)
case_00254 (250, 224, 224)
case_00229 (266, 224, 224)
case_00180 (137, 224, 224)
case_00237 (175, 224, 224)
case_00080 (88, 224, 224)
case_00157 (548, 224, 224)
case_00057 (80, 224, 224)
case_00149 (97, 224, 224)
case_00049 (670, 224, 224)
case_00134 (61, 224, 224)
case_00283 (337, 224, 224)
case_00034 (110, 224, 224)
case_00045 (62, 224, 224)
case_00145 (86, 224, 224)
case_00038 (32, 224, 224)
case_00138 (76, 224, 224)
case_00026 (302, 224, 224)
case_00291 (131, 224, 224)
case_00126 (107, 224, 224)
case_00073 (145, 224, 224)
case_00173 (97, 224, 224)
case_00010 (50, 224, 224)
case_00110 (34, 224, 224)
case_00161 (59, 224, 224)
case_00061 (29, 224, 224)
case_00102 (316, 224, 224)
case_00002 (261, 224, 224)
case_00059 (738, 224, 224)
case_00159 (719, 2

In [11]:
img_list, id_list, label_list = [], [], []

common_keys = labels_dict.keys() & image_dict.keys()

for k in common_keys:
    id_list.append(k)
    img_list.append(image_dict[k])
    label_list.append(labels_dict[k])
    
print(len(img_list), len(id_list), len(label_list))

150 150 150


In [12]:
img_train, img_test_temp, labels_train, labels_test_temp = train_test_split(
    img_list, label_list, test_size=0.30, shuffle=True, stratify=label_list)

id_val, id_test, labels_val, labels_test = train_test_split(
    img_test_temp, labels_test_temp, test_size=0.50, shuffle=True, random_state=42, stratify=labels_test_temp)

In [13]:
img_train_array = np.concatenate( img_train, axis=0 )
x_train = np.repeat(img_train_array[..., np.newaxis], 3, -1)

In [14]:
x_train.shape

(21538, 224, 224, 3)

In [15]:
y_train_list = []
for i in range(len(img_train)):
    
    repeat = img_train[i].shape[0]
    label = labels_train[i]
    y_train_list.extend([label]*repeat)
    
    
y_train = np.asarray(y_train_list).astype('int').reshape((-1,1))

In [16]:
img_val_array = np.concatenate( id_val, axis=0 )
x_val = np.repeat(img_val_array[..., np.newaxis], 3, -1)

In [17]:
y_val_list = []
for i in range(len(id_val)):
    
    repeat = id_val[i].shape[0]
    label = labels_val[i]
    y_val_list.extend([label]*repeat)
    
    
y_val = np.asarray(y_val_list).astype('int').reshape((-1,1))

## upsampling 

In [None]:
from imblearn.over_sampling import SMOTE

x_train_reshape = x_train.reshape(x_train.shape[0], 224 * 224 * 3)

sm = SMOTE(random_state=42)

x_smote, y_smote = sm.fit_resample(x_train_reshape, y_train)
x_smote = x_smote.reshape(x_smote.shape[0], 224, 224, 3)


In [37]:
x_smote.shape

(14408, 224, 224, 1)

## Developing baseline model

In [None]:
# draft model
#inputs = Input(shape=(224, 224, 3))
resnet = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

for layer in resnet.layers:
    layer.trainable = False


metrics_list = [tf.keras.metrics.AUC(name = 'auc'),
                tf.keras.metrics.BinaryAccuracy(name = 'accuracy')]



#calculate class weights
#class_weights = {0 : 20, 1 : 1}

optimizer_fn = tf.keras.optimizers.experimental.RMSprop(learning_rate=0.00002, jit_compile = False)


model = Sequential()
model.add(resnet)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  

model.compile(optimizer = optimizer_fn, loss='binary_crossentropy', metrics= metrics_list)
model.summary()
model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=10, batch_size = 200)


Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 flatten_8 (Flatten)         (None, 100352)            0         
                                                                 
 dense_23 (Dense)            (None, 512)               51380736  
                                                                 
 dropout_8 (Dropout)         (None, 512)               0         
                                                                 
 dense_24 (Dense)            (None, 256)               131328    
                                                                 
 dense_25 (Dense)            (None, 128)               32896     
                                                                 
 dense_26 (Dense)            (None, 64)               

2023-03-23 18:48:25.379614: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2023-03-23 18:51:47.322365: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

In [None]:
results = model.evaluate(x_val, y_val)
results

In [None]:
[0.5584449768066406, 0.86044842004776]