### IMPORTING LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow
import os
import tqdm
import skimage.io
import glob

from tqdm import tqdm

from skimage.io import imread, imshow
from skimage.transform import resize

from sklearn.utils import shuffle

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Conv2D, BatchNormalization, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

%matplotlib inline

### IMPORTING / VIEWING / PREPROCESSING DATASET

In [None]:
# Reading Data

train_dataset_0_all = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/all/*.bmp')
train_dataset_0_hem = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/hem/*.bmp')
train_dataset_1_all = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/all/*.bmp')
train_dataset_1_hem = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/hem/*.bmp')
train_dataset_2_all = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/all/*.bmp')
train_dataset_2_hem = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/hem/*.bmp')

#test_dataset  = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/testing_data/C-NMC_test_final_phase_data/*.bmp')
#valid_dataset = glob.glob('../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data/*.bmp')

valid_data    = pd.read_csv('../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data_labels.csv')

In [None]:
a, b, c= len(train_dataset_0_all), len(train_dataset_1_all), len(train_dataset_2_all)
d = a + b + c
print("Total observation:", d)

In [None]:
a, b, c = len(train_dataset_0_hem), len(train_dataset_1_hem), len(train_dataset_2_hem)
d = a + b + c
print("Total observation:", d)

In [None]:
len(valid_data)

In [None]:
valid_data.head(5)

> `TRAIN DATA PREPROCESSING`

In [None]:
A = []
H = []

A.extend(train_dataset_0_all)
A.extend(train_dataset_1_all)
A.extend(train_dataset_2_all)

H.extend(train_dataset_0_hem)
H.extend(train_dataset_1_hem)
H.extend(train_dataset_2_hem)

A = np.array(A)
H = np.array(H)

len(A), len(H)

In [None]:
# Viewing Diseased Cell - A

fig, ax = plt.subplots(nrows = 1, ncols = 5, figsize = (20,20))

for i in tqdm(range(0, 5)):
    rand = np.random.randint(len(A))
    img  = imread(A[rand])
    img  = resize(img, (128,128))
    ax[i].imshow(img)
    ax[i].axis('off')

In [None]:
# Viewing Non-Diseased Cell - H

fig, ax = plt.subplots(nrows = 1, ncols = 5, figsize = (20,20))

for i in tqdm(range(0, 5)):
    rand = np.random.randint(len(H))
    img  = imread(H[rand])
    img  = resize(img, (128,128))
    ax[i].imshow(img)
    ax[i].axis('off')

In [None]:
Image = []
Label = []

for i in tqdm(range(0, len(A))):
    img = imread(A[i])
    img = resize(img, (128,128))
    Image.append(img)
    Label.append(1)
    
for i in tqdm(range(0, len(H))):
    img = imread(H[i])
    img = resize(img, (128,128))
    Image.append(img)
    Label.append(0)
    
Image = np.array(Image)
Label = np.array(Label)

Image.shape, Label.shape

In [None]:
del A
del H

In [None]:
# Shuffle the data as results are appened.

Image, Label = shuffle(Image, Label, random_state = 42)

In [None]:
# Viewing Image - After Shuffle 

fig, ax = plt.subplots(nrows = 1, ncols = 5, figsize = (20,20))

for i in tqdm(range(0, 5)):
    rand = np.random.randint(len(Image))
    ax[i].imshow(Image[rand])
    ax[i].axis('off')
    a = Label[rand]
    if a == 1:
        ax[i].set_title('Diseased')
    else:
        ax[i].set_title('Non_Diseased')

In [None]:
# Assigning Images and Label to new variable 

X = Image
y = Label

In [None]:
del Image 
del Label

> `VALID DATA PREPROCESSING`

In [None]:
valid_data.head()

In [None]:
# Checking target coloumn distribution

valid_data.labels.value_counts()

In [None]:
# Loading image and storing it numpy array.

X_val = []

for image_name in valid_data.new_names:
    # Loading images
    img = imread('../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data/' + image_name)
    # Resizing 
    img = resize(img, (128,128))
    # Appending them into list
    X_val.append(img)
 
# Converting into array
X_val = np.array(X_val)


# Storing target values as well 
y_val = valid_data.labels.values

### DATA AUGMENTATION

In [None]:
# Training 

train_datagen  = ImageDataGenerator(horizontal_flip=True,
                                    vertical_flip=True,
                                    zoom_range = 0.2)

train_datagen.fit(X)

### MODEL BUILDING

> `MODEL INITIALIZATION`

In [None]:
model = Sequential()

> `BUILDING LAYERS`

In [None]:
# Input layer
model.add(InputLayer(input_shape = (128,128,3)))


# Adding layers
model.add(Conv2D(filters = 32,   kernel_size  = (3, 3), padding='valid', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (2, 2), padding = 'valid'))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 64,   kernel_size  = (3, 3), padding='valid', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (2, 2), padding = 'valid'))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 128,  kernel_size  = (3, 3), padding='valid', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size = (2, 2), padding = 'valid'))
model.add(Dropout(0.3))


# Adding flatten
model.add(Flatten())


# Adding full connected layer (dense)
model.add(Dense(units = 512, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(units = 256, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))


# Adding output layer
model.add(Dense(units = 1, activation='sigmoid'))

In [None]:
# Summary

model.summary()

In [None]:
# Compile Model

model.compile(optimizer = tensorflow.keras.optimizers.Adam(1e-5), loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
# Defining callbacks

filepath = './best_weights.hdf5'

earlystopping = EarlyStopping(monitor = 'val_accuracy', 
                              mode = 'max' , 
                              patience = 15)

checkpoint    = ModelCheckpoint(filepath, 
                                monitor = 'val_accuracy', 
                                mode='max', 
                                save_best_only=True, 
                                verbose = 1)

callback_list = [earlystopping, checkpoint]

> `MODEL FIT`

In [None]:
len(X), len(X_val)

In [None]:
model.fit(train_datagen.flow(X, y, batch_size = 512),
          validation_data = (X_val, y_val),
          epochs = 500,
          verbose = 1,
          callbacks = callback_list)