<a href="https://colab.research.google.com/github/shu-nya/Image-Classification-of-White-Blood-Cells/blob/master/WBC_Classification_model_only_with_vgg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from keras.models import Sequential
from keras import applications
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Input, Conv2D, MaxPooling2D, Lambda
from keras.layers import Dense
from keras.models import Model
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle
import csv
import cv2
import scipy
import os
from PIL import Image
#import splitfolders
import shutil

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
BASE_DIR = '/content/drive/MyDrive/taskMedPrime'

In [4]:
def get_data(folder):
    X = []
    y = []

    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):

            for image_filename in os.listdir(folder + '/' + wbc_type):
                img_file = cv2.imread(folder + '/' + wbc_type + '/' + image_filename)

                if img_file is not None:
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(wbc_type)

    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [5]:
X_train, y_train = get_data(BASE_DIR + '/train')
X_test, y_test = get_data(BASE_DIR + '/test')



In [6]:
X_train = X_train * 1./255.
X_test = X_test * 1./255.

In [7]:
#Convert labels to one hot encoding

encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
encoded_y_test = encoder.transform(y_test)

y_train = np_utils.to_categorical(encoded_y_train)
y_test = np_utils.to_categorical(encoded_y_test)

In [8]:
def get_model():
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1., input_shape=(256, 256, 3), output_shape=(256, 256, 3)))

    # layer 1
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same', input_shape=(256, 356, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))

    # layer 2
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.40))

    # layer 3
    model.add(Conv2D(64, kernel_size=(3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.40))


    # Fully connected network layer
    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.40))

    # Output
    model.add(Dense(9))
    model.add(Activation('softmax'))

    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

    return model

In [9]:
epochs = 20

batch_size = 32

In [10]:
model = get_model()

# fits the model on batches
estimator = model.fit(
    X_train,
    y_train,
    validation_split=0.1,
    epochs=150,
    shuffle=True,
    batch_size=32,
    verbose=2)

model.save_weights('balanced_model_2.h5')

Epoch 1/150
75/75 - 21s - loss: 2.9069 - accuracy: 0.1442 - val_loss: 2.2371 - val_accuracy: 0.0000e+00 - 21s/epoch - 283ms/step
Epoch 2/150
75/75 - 7s - loss: 2.1912 - accuracy: 0.1675 - val_loss: 2.2879 - val_accuracy: 0.0000e+00 - 7s/epoch - 89ms/step
Epoch 3/150
75/75 - 7s - loss: 2.1873 - accuracy: 0.1675 - val_loss: 2.3332 - val_accuracy: 0.0000e+00 - 7s/epoch - 89ms/step
Epoch 4/150
75/75 - 7s - loss: 2.1842 - accuracy: 0.1675 - val_loss: 2.3736 - val_accuracy: 0.0000e+00 - 7s/epoch - 90ms/step
Epoch 5/150
75/75 - 7s - loss: 2.1815 - accuracy: 0.1675 - val_loss: 2.4137 - val_accuracy: 0.0000e+00 - 7s/epoch - 91ms/step
Epoch 6/150
75/75 - 7s - loss: 2.1795 - accuracy: 0.1675 - val_loss: 2.4494 - val_accuracy: 0.0000e+00 - 7s/epoch - 90ms/step
Epoch 7/150
75/75 - 7s - loss: 2.1779 - accuracy: 0.1675 - val_loss: 2.4820 - val_accuracy: 0.0000e+00 - 7s/epoch - 89ms/step
Epoch 8/150
75/75 - 7s - loss: 2.1766 - accuracy: 0.1675 - val_loss: 2.5084 - val_accuracy: 0.0000e+00 - 7s/epoch -

In [11]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')

y_pred = np.rint(model.predict(X_test))

print(accuracy_score(y_test, y_pred))

Predicting on test data
0.0


In [12]:
from sklearn.metrics import confusion_matrix

y_pred_unencoded = np.argmax(y_pred, axis=1)

y_test_unencoded = np.argmax(y_test, axis=1)

print(confusion_matrix(y_test_unencoded, y_pred_unencoded))

[[12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [ 2  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [76  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [84  0  0  0  0  0  0  0  0]]


In [16]:
num_classes = 9
epochs = 20
batch_size = 32

In [13]:
from keras.models import Model
from keras.layers import Input
from keras import optimizers


base_model = applications.VGG16(include_top=False, weights='imagenet')
input = Input(shape=(256, 256, 3), name = 'image_input')
vgg_output = base_model(input)

top_model = Flatten()(vgg_output)
top_model = Dense(64, activation='relu')(top_model)
predictions = Dense(num_classes, activation='softmax', name='prediction_layer')(top_model)

model = Model(inputs=input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
layers = base_model.layers[:-2]
for layer in layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 256, 256, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, None, None, 512)   14714688  
                                                                 
 flatten_3 (Flatten)         (None, 32768)             0         
                                                                 
 dense_3 (Dense)             (None, 64)                2097216   
                                                                 
 prediction_layer (Dense)    (None, 9)                 585       
                                                                 
Total params: 16,812,489
Trainable params: 4,457,609
Non-trainable params: 12,354,880
_________________________________________________________________


In [17]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, shuffle=True, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f181230af50>

In [18]:
model.save_weights('vgg_top.h5')

In [19]:
model.load_weights('vgg_top.h5')

In [20]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')
y_pred = np.rint(model.predict(X_test))

print(accuracy_score(y_test, y_pred))

Predicting on test data
0.46261682242990654


In [21]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 256, 256, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, None, None, 512)   14714688  
                                                                 
 flatten_3 (Flatten)         (None, 32768)             0         
                                                                 
 dense_3 (Dense)             (None, 64)                2097216   
                                                                 
 prediction_layer (Dense)    (None, 9)                 585       
                                                                 
Total params: 16,812,489
Trainable params: 4,457,609
Non-trainable params: 12,354,880
_________________________________________________________________


In [22]:
print(base_model.layers[-2].name)

block5_conv3
