In [1]:
import numpy as np
from keras.models import Sequential , Model
from keras import applications
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, Lambda
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils , to_categorical
from keras.preprocessing.image import ImageDataGenerator

from keras.datasets import cifar10
from keras.optimizers import SGD
from keras.applications.vgg19 import VGG19

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle
import random
import csv
import cv2
import scipy
import os
%matplotlib inline
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
num_classes = 2
epochs = 20
# BASE_PATH = '/home/ec2-user/cell_classifier/'
BASE_DIR = '../'
batch_size = 32
dropout = 0.7
learning_rate = 0.001 

In [3]:
def get_data(folder):
    """
    Load the data and labels from the given folder.
    """
    X = []
    y = []

    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):
            if wbc_type in ['NEUTROPHIL', 'EOSINOPHIL']:
                label = 'POLYNUCLEAR'
            else:
                label = 'MONONUCLEAR'
            for image_filename in os.listdir(folder + wbc_type):
                img_file = cv2.imread(folder + wbc_type + '/' + image_filename)
                if img_file is not None:
                    # Downsample the image to 120, 160, 3
                    img_file = scipy.misc.imresize(arr=img_file, size=(120, 160, 3))
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(label)
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [4]:
X_train, y_train = get_data(BASE_DIR + 'images/TRAIN/')
print(y_train)
X_test, y_test = get_data(BASE_DIR + 'images/TEST/')

encoder = LabelEncoder()
encoder.fit(y_train)
y_train = encoder.transform(y_train)
y_test = encoder.transform(y_test)
print(y_train)

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


['POLYNUCLEAR' 'POLYNUCLEAR' 'POLYNUCLEAR' ... 'MONONUCLEAR' 'MONONUCLEAR'
 'MONONUCLEAR']
[1 1 1 ... 0 0 0]


In [5]:
def model(x_train, y_train, base_model):

    # get layers and add average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    # add fully-connected layer
    x = Dense(512, activation='relu')(x)

    # add output layer
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    # freeze pre-trained model area's layer
    for layer in base_model.layers:
        layer.trainable = False

    # update the weight that are added
    model.compile(optimizer='rmsprop', loss='binary_crossentropy')
    model.fit(x_train, y_train)

    # choose the layers which are updated by training
    layer_num = len(model.layers)
    for layer in model.layers[:int(layer_num * 0.9)]:
        layer.trainable = False

    for layer in model.layers[int(layer_num * 0.9):]:
        layer.trainable = True

    # update the weights
    model.compile(optimizer=SGD(lr=learning_rate, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=epochs)
    return history




In [6]:
vgg_19_model = VGG19(weights='imagenet', include_top=False)

## VGG19

In [7]:
history = model(X_train, y_train, vgg_19_model)

Epoch 1/1
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')
y_pred = np.rint(history.model.predict(X_test))

print(accuracy_score(y_test, y_pred))

Predicting on test data
0.5014073180538802


In [9]:
history.model.save_weights('biclass_vgg19_model_epoch'+str(epochs)+'dropout'+str(dropout)+'lr'+str(learning_rate)+'.h5')

In [10]:
history.model.load_weights('biclass_vgg19_model_epoch'+str(epochs)+'dropout'+str(dropout)+'lr'+str(learning_rate)+'.h5')

In [11]:
history.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [12]:
from sklearn.metrics import confusion_matrix


print(confusion_matrix(y_test, y_pred))


[[   0 1240]
 [   0 1247]]
