In [143]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import imageio
import numpy as np 
from timeit import default_timer as timer
import tensorflow as tf
from tensorflow.keras.preprocessing import image 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models, layers, optimizers


In [144]:
fold0_all = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/all"
fold1_all = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/all"
fold2_all = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/all"

#hem is the normal one
fold0_nrml = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/hem"
fold1_nrml = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/hem"
fold2_nrml = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/hem"

test_data = "/kaggle/input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data/"
test_labels = pd.read_csv("/kaggle/input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data_labels.csv")

In [145]:
def get_path_image(folder):
    image_paths = []
    image_fnames = os.listdir(folder) 
    for img_id in range(len(image_fnames)):
        img = os.path.join(folder,image_fnames[img_id])
        image_paths.append(img)
    
    return image_paths

In [146]:
#image absolute paths for cancer cells and normal cells
cancer_lst = []

for i in [fold0_all,fold1_all,fold2_all]:
    paths = get_path_image(i)
    cancer_lst.extend(paths)
    
    
print(len(cancer_lst))

In [147]:
normal_lst = []
for i in [fold0_nrml,fold1_nrml,fold2_nrml]:
    paths = get_path_image(i)
    normal_lst.extend(paths)
   
   
print(len(normal_lst))

In [148]:
cancer_dict = {"x_col":cancer_lst,
          "y_col":[np.nan for x in range(len(cancer_lst))]}


cancer_dict["y_col"] = "ALL"

normal_dict = {"x_col":normal_lst,
          "y_col":[np.nan for x in range(len(normal_lst))]}


normal_dict["y_col"] = "HEM"

cancer_df = pd.DataFrame(cancer_dict)
normal_df = pd.DataFrame(normal_dict)

train_df = cancer_df.append(normal_df, ignore_index=True)

In [149]:
all_len = len(cancer_df)
norm_len = len(normal_df)

nrm_size = int((norm_len/100) * 25)
all_size = int((all_len/100) * 25)
validation_df = pd.DataFrame()
validation_df = validation_df.append(normal_df[0:nrm_size])
validation_df = validation_df.append(cancer_df[0:all_size]) 

train_df = pd.DataFrame()
train_df = train_df.append(normal_df[nrm_size : norm_len])
train_df = train_df.append(cancer_df[all_size : all_len])   

In [150]:
#Reading the validation data
test_list = get_path_image(test_data)


##Convert the list to a dictionary. The labels are stored in the val_labels
#3So we create a dictionary with x as the file name and y as the labels
##The labels are having 0's and 1's.
##0 means normal and 1 means cancer - ALL

test_dict = {"x_col":test_list,
                   "y_col":test_labels["labels"]}

test_df = pd.DataFrame(test_dict)

test_df["y_col"].replace(to_replace = [1,0], value = ["ALL","HEM"], inplace = True)



In [151]:
train_datagen = ImageDataGenerator(
        rescale=1./255 #pixel values are 255 maximum
         )


test_datagen = ImageDataGenerator(
        rescale=1./255 )

train_generator = train_datagen.flow_from_dataframe(
                  train_df,
                  x_col = "x_col",
                  y_col = "y_col",
                  target_size = (256, 256),
                 
                  #batch_size = 32,
                  color_mode = "rgb",
                  shuffle = True,
                  class_mode = "binary"
                  )

validation_generator = train_datagen.flow_from_dataframe(
                  validation_df,
                  x_col = "x_col",
                  y_col = "y_col",
                  target_size = (256, 256),                  
                  #batch_size = 32,
                  color_mode = "rgb",
                  shuffle = True,
                  class_mode = "binary"
                  )
test_generator = test_datagen.flow_from_dataframe(
                  test_df,
                  x_col = "x_col",
                  y_col = "y_col",
                  target_size = (256, 256),
                  color_mode = "rgb",
                  class_mode = "binary",
                  shuffle = False)





In [152]:
model = models.Sequential()
#model.name="KAllCnn_IDB1"

model.add(tf.keras.layers.ZeroPadding2D(padding=(2, 2), input_shape= (256,256,3)))
model.add(layers.Conv2D(30, (3, 3), input_shape = (256,256,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(tf.keras.layers.ZeroPadding2D(padding=(2, 2), input_shape=(256,256,3)))
model.add(layers.Conv2D(30, (3, 3),  activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(tf.keras.layers.ZeroPadding2D(padding=(2, 2), input_shape=(256,256,3)))
model.add(layers.Conv2D(30, (3, 3),  activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
     
model.add(tf.keras.layers.ZeroPadding2D(padding=(2, 2), input_shape=(256,256,3)))
model.add(layers.Conv2D(30, (3, 3),  activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))    
    

model.add(layers.Flatten())
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1, activation='sigmoid'))

In [153]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-6),
              metrics=['accuracy', 'Recall'])



history = model.fit(train_generator, 
                    epochs=8, 
                    validation_data=validation_generator, 
                    workers = 7
                   )

In [154]:
score = model.evaluate(test_generator, verbose=1)

In [155]:
!pip install git+https://github.com/paulgavrikov/visualkeras
import visualkeras
visualkeras.layered_view(model,legend=True,  to_file='Final.png')

In [156]:
model.summary()

#### Final Model Summary - 2x2C1D Dropout

In [157]:
model = models.Sequential()
model.add(layers.Conv2D(64, 3, activation = 'relu', input_shape=(256,256,3)))
model.add(layers.Conv2D(64, 3, activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.5))
model.add(layers.Conv2D(64, 3, activation = 'relu'))
model.add(layers.Conv2D(64, 3, activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.5))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(),
              metrics=['accuracy','Recall'])

history = model.fit(train_generator, 
                    epochs=8, 
                    validation_data=validation_generator, 
                    workers = 7
                   )

In [158]:
score = model.evaluate(test_generator, verbose=1)

In [159]:
visualkeras.layered_view(model,legend=True,  to_file='Final.png')

In [160]:
model.summary()