# Malaria Detection In Human Cells

## 1. Download Dataset

#### Used Kaggle Json to directly access kaggle dataset on google colab notebook without downloading it on our computer

In [None]:
!ls -lha kaggle.json

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d iarunava/cell-images-for-detecting-malaria

#### Uncompressing zip file

In [None]:
from zipfile import ZipFile
print("uncompressing zip file")

filename = "cell-images-for-detecting-malaria.zip"

with ZipFile(filename, 'r') as zip:
    zip.extractall()

print("done")

## 2. Data Preprocessing

In [None]:
import os
import matplotlib.pyplot as plt
import random
import numpy as np

IMAGE_WIDTH=112
IMAGE_HEIGHT=112
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3 # RGB color
batch_size=32

#### Dividing data into training and validation directory

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_dir = 'cell_images'

augs = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,  
    zoom_range=0.2,        
    horizontal_flip=True,
    validation_split=0.2
    )  

train_gen = augs.flow_from_directory(
    train_dir,
    target_size = IMAGE_SIZE,
    batch_size = batch_size,
    class_mode = 'binary',
    subset='training'
    )

validation_gen = augs.flow_from_directory(
    train_dir,
    target_size = IMAGE_SIZE,
    batch_size = batch_size,
    class_mode = 'binary',
    subset='validation'
    ) 




## 3. Create a model

#### I used keras library for creating and training image classifier

In [None]:
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense ,BatchNormalization
from keras.callbacks import CSVLogger
from keras.callbacks import ModelCheckpoint

In [None]:
TRAINING_LOGS_FILE = "malaria_detection_training_logs.csv"
MODEL_SUMMARY_FILE = "malaria_detection_model_summary.txt"
MODEL_FILE = "malaria_detection_model.h5"
Checkpoint_file = "checkpoint_best_weights_malaria_detection.hdf5"

In [None]:
# Model 
model = Sequential()

model.add(Conv2D(32, 3, 3, border_mode='same', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(1))
model.add(Activation('sigmoid'))
    
model.compile(loss='binary_crossentropy',
            optimizer=RMSprop(lr=0.0001),
            metrics=['accuracy'])

#### Model architecture


In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='malaria_detection_architecture.png', show_shapes=True, show_layer_names=True)

#### Creating summary file for our model

In [None]:
with open(MODEL_SUMMARY_FILE,"w") as fh:
    model.summary(print_fn=lambda line: fh.write(line + "\n"))


#### EarlyStopping - used to prevent overfitting
#### ReduceLROnplateau - used to reduce learning rate for model optimization

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# To prevent over fitting we will stop the learning after 5 epochs and val_loss value not decreased
earlystop = EarlyStopping(patience=5)

# It will reduce the learning rate when accuracy is not increasing for 2 epochs
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction, 
             CSVLogger(TRAINING_LOGS_FILE, append=False,separator=";"),
            ModelCheckpoint(Checkpoint_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max')]



## 4. Training Model

In [None]:
history = model.fit_generator(
    train_gen, 
    steps_per_epoch  = train_gen.samples // batch_size,
    epochs = 10, 
    verbose = 1,
    callbacks=callbacks,
    validation_data = validation_gen, 
    validation_steps = validation_gen.samples // batch_size,
)

In [None]:
model.load_weights('checkpoint_best_weights_malaria_detection.hdf5')

## 5. Saving trained model

In [None]:
from keras.models import load_model
model.save('malaria_detection_model.h5')