In [1]:
from PIL import Image
import numpy as np
import cv2
import glob
import os
import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score

# Data Preparation
- Read images into numpy array
- Save numpy array into files
- Mapping targets into int labels
- Split data into train/test datasets

In [2]:
def load_image(image_file):
    image = cv2.imread(image_file)
    image = Image.fromarray(image)
    image = image.resize((50, 50))
    image = np.array(image)
    return image


if os.path.exists("Cells.npy") and os.path.exists("Labels.npy"):
    cells = np.load("Cells.npy")
    labels = np.load("Labels.npy")
else:
    Parasitized = [load_image(p) for p in glob.glob("./cell_images/cell_images/Parasitized/*.png")]
    Uninfected = [load_image(p) for p in glob.glob("./cell_images/cell_images/Uninfected/*.png")]
    cells = np.array(Parasitized + Uninfected)
    labels = np.array([0] * len(Parasitized) + [1]*len(Uninfected))
    
    np.save("Cells", cells)
    np.save("Labels", labels)

In [3]:
# normalize  data
cells = cells / 255

In [4]:
# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(cells, labels, test_size=0.1, random_state=42)

In [5]:
# one-hot encoding of y
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

# Define the CNN model
- 2 convolutional layers and 2 dense layers
- Dropout layers

In [6]:
model = Sequential([
    Conv2D(filters=16, kernel_size=2, padding="same", activation="relu", input_shape=(50,50,3)),
    MaxPooling2D(pool_size=2),
    Conv2D(filters=32, kernel_size=2, padding="same", activation="relu"),
    MaxPooling2D(pool_size=2),
    Conv2D(filters=64, kernel_size=2, padding="same", activation="relu"),
    MaxPooling2D(pool_size=2),
    Dropout(0.25),
    Flatten(),
    Dense(500,activation="relu"),
    Dropout(0.25),
    Dense(2,activation="softmax")
])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 50, 50, 16)        208       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 25, 25, 32)        2080      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        8256      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
dropout (Dropout)            (None, 6, 6, 64)          0

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
# fit the model with 1/9 of the data as validation dataset
model.fit(X_train, y_train, batch_size=64, epochs=10, verbose=1, validation_split=0.125)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f967361be50>

# Model Performance
- Accuracy
- Confusion matrix
- F1 Score

In [10]:
# loss, accuracy
model.evaluate(X_test, y_test, verbose=1)



[0.11773661524057388, 0.9579100012779236]

In [11]:
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)
confusion_matrix(y_true, y_pred)

array([[1319,   88],
       [  28, 1321]])

In [12]:
f1_score(y_true, y_pred)

0.9579405366207396

In [13]:
model.save("cells.h5")

# Transfer Learning with VGG16

In [14]:
from keras.applications.vgg16 import VGG16
from keras.models import Model

In [15]:
# load model without classifier layers
model = VGG16(include_top=False, input_shape=(50, 50, 3))
# for layer in model.layers:
#     layer.trainable = False

# add new classifier layers
flat1 = Flatten()(model.layers[-1].output)
class1 = Dense(512, activation='relu')(flat1)
output = Dense(2, activation='softmax')(class1)
# define new model
model = Model(inputs=model.inputs, outputs=output)

In [16]:
# model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=64, epochs=10, verbose=1, validation_split=0.125)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

In [26]:
# loss, Acc
model.evaluate(X_test, y_test)



[0.13218945264816284, 0.956095814704895]

In [27]:
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)
confusion_matrix(y_true, y_pred)

array([[1316,   91],
       [  30, 1319]])

In [28]:
f1_score(y_true, y_pred)

0.9561435302645885