In [1]:
import os
import numpy as np
import random
from shutil import copyfile
import matplotlib

from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.convolutional import ZeroPadding2D
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.layers import add
from keras.regularizers import l2
from keras import backend as K

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.optimizers import SGD
from sklearn.metrics import classification_report
#from imutils import paths
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
# The data set can be downloaded from ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
# Set the dir path in 'top_path' variable
top_path = 'E:\\Malaria dataset'
cell_img = 'cell_images'
base_dir = os.path.sep.join([top_path, cell_img])
uninfect = 'Uninfected'
infect = 'Parasitized'

file_list = []
for each in os.listdir(base_dir+'\\'+uninfect):
    file_list.append(base_dir+'\\'+uninfect+'\\'+each)
for each in os.listdir(base_dir+'\\'+infect):
    file_list.append(base_dir+'\\'+infect+'\\'+each)
random.shuffle(file_list)
random.shuffle(file_list)
random.shuffle(file_list)
random.shuffle(file_list)

In [3]:
train_split = 0.8
val_split = 0.1
test_split = 0.1

if train_split+val_split+test_split != 1:
    raise Exception('Data splits do not equal to 1')
    
train_files = file_list[:int(len(file_list)*train_split)]
test_files = file_list[int(len(file_list)*train_split) : int(len(file_list)*(train_split+test_split))]
val_files = file_list[int(len(file_list)*(train_split+test_split)) : ]

train_dir = 'training_data'
test_dir = 'testing_data'
val_dir = 'val_data'

In [4]:
trainLength = len(train_files)
testLength = len(test_files)
valLength = len(val_files)

In [60]:
for fil in train_files:
    label = fil.split(os.path.sep)[-2]
    name = fil.split(os.path.sep)[-1]
    dest = os.path.sep.join([top_path, train_dir, label, name])
    copyfile(fil, dest)

In [61]:
for fil in test_files:
    label = fil.split(os.path.sep)[-2]
    name = fil.split(os.path.sep)[-1]
    dest = os.path.sep.join([top_path, test_dir, label, name])
    copyfile(fil, dest)

In [62]:
for fil in val_files:
    label = fil.split(os.path.sep)[-2]
    name = fil.split(os.path.sep)[-1]
    dest = os.path.sep.join([top_path, val_dir, label, name])
    copyfile(fil, dest)

In [6]:
class ResNet:
    @staticmethod
    def residual_module(data, K, stride, chanDim, red=False, reg=0.0001,
                        bnEps=2e-5, bnMom=0.9):
        shortcut = data
        
        bn1 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(data)
        act1 = Activation("relu")(bn1)
        conv1 = Conv2D(int(K*0.25) , (1,1), use_bias=False,
                      kernel_regularizer=l2(reg))(act1)
        
        bn2 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(conv1)
        act2 = Activation("relu")(bn2)
        conv2 = Conv2D(int(K *0.25), (3,3), strides=stride, padding="same",
                      use_bias=False, kernel_regularizer=l2(reg))(act2)
        
        bn3 = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(conv2)
        act3 = Activation("relu")(bn3)
        conv3 = Conv2D(K, (1,1), use_bias=False, kernel_regularizer=l2(reg))(act3)
        
        if red:
            shortcut = Conv2D(K, (1,1), strides=stride, use_bias=False,
                             kernel_regularizer=l2(reg))(act1)
        x = add([conv3, shortcut])
        
        return x
    
    @staticmethod
    def build(width, height, depth, classes, stages, filters,
             reg=0.0001, bnEps=2e-5, bnMom=0.9):
        
        inputShape = (height,width,depth)
        chanDim = -1
        
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
            
        inputs = Input(shape=inputShape)
        x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(inputs)
        
        x = Conv2D(filters[0], (5,5), use_bias=False, padding="same",
                   kernel_regularizer=l2(reg))(x)
        x = BatchNormalization(axis=chanDim, epsilon=bnEps, momentum=bnMom)(x)
        x = Activation("relu")(x)
        x = ZeroPadding2D((1,1))(x)
        x = MaxPooling2D((3,3), strides=(2,2))(x)
        
        for i in range(0, len(stages)):
            stride = (1,1) if i == 0 else (2,2)
            x = ResNet.residual_module(x, filters[i+1], stride,
                                      chanDim, red=True, bnEps=bnEps,
                                      bnMom=bnMom)
            for j in range(0, stages[i] - 1):
                x = ResNet.residual_module(x, filters[i+1],
                                          (1,1), chanDim, bnEps=bnEps,
                                          bnMom=bnMom)
                
        x = BatchNormalization(axis=chanDim, epsilon=bnEps,
                              momentum=bnMom)(x)
        x = Activation("relu")(x)
        x = AveragePooling2D((8,8))(x)
        
        x = Flatten()(x)
        x = Dense(classes, kernel_regularizer=l2(reg))(x)
        x = Activation("softmax")(x)
        
        model = Model(inputs, x, name="resnet")
        
        return model

In [7]:
NUM_EPOCHS = 50
INIT_LR = 1e-1
BS = 32

def poly_decay(epoch):
    max_epoch = NUM_EPOCHS
    baseLR = INIT_LR
    power = 1.0
    
    alpha = baseLR * (1 - (epoch / float(max_epoch)))**power
    
    return alpha

In [8]:
trainAug = ImageDataGenerator(rescale=1/255.0,
                             rotation_range=20,
                             zoom_range=0.05,
                             width_shift_range=0.05,
                             height_shift_range=0.05,
                             shear_range=0.05,
                             horizontal_flip=True,
                             fill_mode="nearest")

valAug = ImageDataGenerator(rescale=1/255.0)

In [9]:
trainGen = trainAug.flow_from_directory(
    os.path.sep.join([top_path,train_dir]),
    class_mode="categorical",
    target_size=(64,64),
    color_mode="rgb",
    shuffle=True,
    batch_size=BS)

valGen = valAug.flow_from_directory(
    os.path.sep.join([top_path,val_dir]),
    class_mode="categorical",
    target_size=(64,64),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS)

testGen = valAug.flow_from_directory(
    os.path.sep.join([top_path,test_dir]),
    class_mode="categorical",
    target_size=(64,64),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS)

Found 22047 images belonging to 2 classes.
Found 2756 images belonging to 2 classes.
Found 2755 images belonging to 2 classes.


In [10]:
model = ResNet.build(64, 64, 3, 2, (3, 4, 6),
                     (64, 128, 256, 512), reg=0.0005)
opt = SGD(lr=INIT_LR, momentum=0.9)
model.compile(loss="binary_crossentropy", optimizer=opt,
             metrics=["accuracy"])

Instructions for updating:
Colocations handled automatically by placer.


In [53]:
#rough


In [11]:
callback = [LearningRateScheduler(poly_decay)]
H = model.fit_generator(trainGen, steps_per_epoch=trainLength//BS,
                       validation_data=valGen,
                       validation_steps=valLength//BS,
                       epochs=NUM_EPOCHS, callbacks=callback)

In [52]:
otp = model.predict_generator(testGen,
                              steps=testLength//BS)