In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import imageio
import os, shutil
import tensorflow as tf

In [None]:
fold0_all = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/all"
fold1_all = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/all"
fold2_all = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/all"

fold0_nrml = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/hem"
fold1_nrml = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/hem"
fold2_nrml = "../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/hem"

val_data = "../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data"
val_labels = pd.read_csv("../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data_labels.csv")

In [None]:
#blood cell with Acute lymphoblastic leukemia 
can_image_fnames = os.listdir(fold0_all)
cancer_img = imageio.imread(os.path.join(fold0_all,
                                         can_image_fnames[5]))
plt.imshow(cancer_img)
plt.show()

In [None]:
#blood cell without cancer
nrml_image_fnames = os.listdir(fold0_nrml)
cell_img = imageio.imread(os.path.join(fold0_nrml,
                                         nrml_image_fnames[5]))

plt.imshow(cell_img)
plt.show()

In [None]:
cell_img.shape
#image is 450x450 pxls with color

In [None]:
def get_path_image(folder):
    image_paths = []
    image_fnames = os.listdir(folder) 
    for img_id in range(len(image_fnames)):
        img = os.path.join(folder,image_fnames[img_id])
        image_paths.append(img)
    
    return image_paths



In [None]:
#image absolute paths for cancer cells
x_col = []

for i in [fold0_all,fold1_all,fold2_all]:
    paths = get_path_image(i)
    x_col.extend(paths)
print(len(x_col))


In [None]:
#image absolute paths for normal cells
for i in [fold0_nrml,fold1_nrml,fold2_nrml]:
    paths = get_path_image(i)
    x_col.extend(paths)
print(len(x_col))


In [None]:
canxr = {"x_col":x_col,
         "y_col":[np.nan for x in range(len(x_col))]}

In [None]:
train_df = pd.DataFrame(canxr) 

In [None]:
train_df

In [None]:
train_df["y_col"][0:7272] = "ALL"
train_df["y_col"][7272:10662] = "HEM"

In [None]:
train_df["y_col"][7271:7280]

In [None]:
plt.pie([len(train_df[train_df["y_col"]=="ALL"]),len(train_df[train_df["y_col"]=="HEM"])],
        labels=["ALL","HEM"],autopct='%.f'
       )
plt.gca()
plt.show()

In [None]:
val_x = get_path_image(val_data)

In [None]:
len(val_x)

In [None]:
val_labels["labels"].head(3)

In [None]:
val_dict = {"x_col":val_x,
           "y_col":val_labels["labels"]}

In [None]:
val_df = pd.DataFrame(val_dict)
val_df["y_col"].replace(to_replace=[1,0],value=["ALL","HEM"],inplace=True)

In [None]:
val_df.info()

In [None]:
train_df.info()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_gen = ImageDataGenerator()
val_gen = ImageDataGenerator()

train_generator = train_gen.flow_from_dataframe(
    train_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(450, 450),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

val_generator = train_gen.flow_from_dataframe(
    val_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(450, 450),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

In [None]:
#This will be usefull in checking what learning rate is suitable for our purpose.
def modelFitter(train_generator,epchs,val_generator,batch_size,lr):
    import keras as ks
    model = ks.models.Sequential()
    opt = ks.optimizers.Nadam(learning_rate=lr,decay=1e-4)
    callback = ks.callbacks.EarlyStopping(monitor="val_loss",
                                          patience=10,
                                          verbose=2)
    #building architecture
    #Adding layers
    model.add(ks.layers.Conv2D(6,(5,5),activation="elu",
                               name="layer1",
                               input_shape=(450,450,3))) 
    model.add(ks.layers.MaxPooling2D((2,2),strides=2,name="layer2"))
    model.add(ks.layers.Conv2D(16,(5,5),activation="elu",name="layer3"))
    model.add(ks.layers.MaxPooling2D((2,2),strides=2,name="layer4"))
    model.add(ks.layers.Flatten())
    model.add(ks.layers.Dense(120,activation="elu",
                              kernel_initializer="he_normal",
                              kernel_regularizer=ks.regularizers.l2(0.01)))
    model.add(ks.layers.BatchNormalization())
    model.add(ks.layers.Dense(84,activation="elu",
                               kernel_regularizer=ks.regularizers.l2(0.01)))
    model.add(ks.layers.BatchNormalization())
    
    model.add(ks.layers.Dense(1,activation="sigmoid",name="output"))#2 classes 
    summmry = model.summary()
    #Compile the model.
    #setting optimisation, cost funciton and metric to be used
    model.compile(loss="binary_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])
    history = model.fit(train_generator,
                        steps_per_epoch=100,
                        epochs=epchs,
                        callbacks=[callback],
                        batch_size=batch_size,
                        validation_data=val_generator,
                        validation_steps=50)
    #Visualise curves
    plt.plot(history.history['accuracy'], label='train_acc')
    plt.plot(history.history['val_accuracy'], label='valid_acc')
    plt.title('lrate='+str(lr), pad=-50)
    plt.legend()
    plt.grid(True)
    
    return history
    

In [None]:
#Check output after fitting model on different lr.
learning_rates = [1E-0, 1E-1, 1E-2, 1E-3]
for i in range(len(learning_rates)):
    plt.figure(figsize=(10,10))
    # determine the plot number
    plot_no = 420 + (i+1)
    plt.subplot(plot_no)
    # fit model and plot learning curves for a learning rate
    modelFitter(train_generator,10,val_generator,batch_size=32,lr=learning_rates[i])
plt.show()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_gen = ImageDataGenerator()
val_gen = ImageDataGenerator()

train_generator = train_gen.flow_from_dataframe(
    train_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(227, 227),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

val_generator = train_gen.flow_from_dataframe(
    val_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(227, 227),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

In [None]:
def alexnet(train_generator,epchs,val_generator,batch_size,lr):
    import keras as ks
    model = ks.models.Sequential()
    opt = ks.optimizers.Nadam(learning_rate=lr,decay=1e-4)
    callback = ks.callbacks.EarlyStopping(monitor="val_loss",
                                          patience=10,
                                          verbose=2)
    #building architecture
    #Adding layers
    model.add(ks.layers.Conv2D(96,(11,11),
                               strides=4,
                               activation="elu",
                               name="layer1",
                               input_shape=(227,227,3))) 
    model.add(ks.layers.MaxPooling2D((3,3),strides=2,name="layer2"))
    model.add(ks.layers.Conv2D(256,(5,5),padding="valid",activation="elu",name="layer3"))
    model.add(ks.layers.MaxPooling2D((3,3),strides=2,name="layer4"))
    model.add(ks.layers.Conv2D(384,(3,3),padding="valid",activation="elu",name="layer5"))
    model.add(ks.layers.Conv2D(384,(3,3),padding="valid",activation="elu",name="layer6"))
    model.add(ks.layers.Conv2D(256,(3,3),padding="valid",activation="elu",name="layer7"))
    model.add(ks.layers.MaxPooling2D((3,3),strides=2,name="layer8"))
    model.add(ks.layers.Flatten())
    model.add(ks.layers.Dense(1024,activation="elu",
                              kernel_initializer="he_normal",
                              kernel_regularizer=ks.regularizers.l2(0.01)))
    model.add(ks.layers.BatchNormalization())
    model.add(ks.layers.Dense(1024,activation="elu",
                               kernel_regularizer=ks.regularizers.l2(0.01)))
    model.add(ks.layers.BatchNormalization())
    
    model.add(ks.layers.Dense(1,activation="sigmoid",
                              kernel_initializer="glorot_uniform",
                              name="output"))#2 classes 
    summmry = model.summary()
    #Compile the model.
    #setting optimisation, cost funciton and metric to be used
    model.compile(loss="binary_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])
    history = model.fit(train_generator,
                        steps_per_epoch=100,
                        epochs=epchs,
                        callbacks=[callback],
                        batch_size=batch_size,
                        validation_data=val_generator,
                        validation_steps=50)
    #Visualise curves
    plt.plot(history.history['accuracy'], label='train_acc')
    plt.plot(history.history['val_accuracy'], label='valid_acc')
    plt.title('lrate='+str(lr), pad=-50)
    plt.legend()
    plt.grid(True)
    
    return history
    

In [None]:
#Check output after fitting model on different lr.
learning_rates = [1E-0, 1E-1, 1E-2, 1E-3]
for i in range(len(learning_rates)):
    plt.figure(figsize=(10,10))
    # determine the plot number
    plot_no = 420 + (i+1)
    plt.subplot(plot_no)
    # fit model and plot learning curves for a learning rate
    alexnet(train_generator,10,val_generator,batch_size=32,lr=learning_rates[i])
plt.show()

In [None]:
history1 = modelFitter(train_generator,100,val_generator,batch_size=16,lr=1.0)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_gen = ImageDataGenerator()
val_gen = ImageDataGenerator()

train_generator = train_gen.flow_from_dataframe(
    train_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(227, 227),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

val_generator = train_gen.flow_from_dataframe(
    val_df,
    x_col="x_col",
    y_col="y_col",
    target_size=(227, 227),
    batch_size=32,
    color_mode="rgb",
    shuffle=True,
    class_mode='binary')

history2 = alexnet(train_generator,100,val_generator,batch_size=16,lr=1E+1)