<a href="https://colab.research.google.com/github/shu-nya/Image-Classification-of-White-Blood-Cells/blob/master/WBC_Classification_model_only_with_basemodel_vgg_resnet50_152.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from keras.models import Sequential
from keras import applications
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Input, Conv2D, MaxPooling2D, Lambda
from keras.layers import Dense
from keras.models import Model
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle
import csv
import cv2
import scipy
import os
from PIL import Image
#import splitfolders
import shutil

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
BASE_DIR = '/content/drive/MyDrive/taskMedPrime'

In [4]:
def get_data(folder):
    X = []
    y = []

    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):

            for image_filename in os.listdir(folder + '/' + wbc_type):
                img_file = cv2.imread(folder + '/' + wbc_type + '/' + image_filename)

                if img_file is not None:
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(wbc_type)

    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [5]:
X_train, y_train = get_data(BASE_DIR + '/train')
X_test, y_test = get_data(BASE_DIR + '/test')



In [6]:
X_train = X_train * 1./255.
X_test = X_test * 1./255.

In [7]:
#Convert labels to one hot encoding

encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
encoded_y_test = encoder.transform(y_test)

y_train = np_utils.to_categorical(encoded_y_train)
y_test = np_utils.to_categorical(encoded_y_test)

In [8]:
def get_model():
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1., input_shape=(256, 256, 3), output_shape=(256, 256, 3)))

    # layer 1
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same', input_shape=(256, 356, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))

    # layer 2
    model.add(Conv2D(32, kernel_size=(3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.40))

    # layer 3
    model.add(Conv2D(64, kernel_size=(3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.40))


    # Fully connected network layer
    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.40))

    # Output
    model.add(Dense(9))
    model.add(Activation('softmax'))

    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

    return model

In [9]:
epochs = 20

batch_size = 32

In [10]:
model = get_model()

# fits the model on batches
estimator = model.fit(
    X_train,
    y_train,
    validation_split=0.1,
    epochs=epochs,
    shuffle=True,
    batch_size=batch_size,
    verbose=2)

model.save_weights('balanced_model_2.h5')

Epoch 1/150
76/76 - 20s - loss: 2.4926 - accuracy: 0.1606 - val_loss: 2.2432 - val_accuracy: 0.0000e+00 - 20s/epoch - 267ms/step
Epoch 2/150
76/76 - 7s - loss: 2.1908 - accuracy: 0.1673 - val_loss: 2.2881 - val_accuracy: 0.0000e+00 - 7s/epoch - 88ms/step
Epoch 3/150
76/76 - 7s - loss: 2.1871 - accuracy: 0.1673 - val_loss: 2.3326 - val_accuracy: 0.0000e+00 - 7s/epoch - 88ms/step
Epoch 4/150
76/76 - 7s - loss: 2.1840 - accuracy: 0.1673 - val_loss: 2.3737 - val_accuracy: 0.0000e+00 - 7s/epoch - 89ms/step
Epoch 5/150
76/76 - 8s - loss: 2.1815 - accuracy: 0.1673 - val_loss: 2.4107 - val_accuracy: 0.0000e+00 - 8s/epoch - 99ms/step
Epoch 6/150
76/76 - 8s - loss: 2.1797 - accuracy: 0.1673 - val_loss: 2.4395 - val_accuracy: 0.0000e+00 - 8s/epoch - 101ms/step
Epoch 7/150
76/76 - 7s - loss: 2.1782 - accuracy: 0.1673 - val_loss: 2.4679 - val_accuracy: 0.0000e+00 - 7s/epoch - 91ms/step
Epoch 8/150
76/76 - 7s - loss: 2.1768 - accuracy: 0.1673 - val_loss: 2.4963 - val_accuracy: 0.0000e+00 - 7s/epoch 

KeyboardInterrupt: ignored

In [11]:
model.save_weights('balanced_model_2.h5')

In [18]:
model.load_weights('balanced_model_2.h5')

In [13]:
model.evaluate(X_test, y_test)



[2.271425724029541, 0.35514017939567566]

In [14]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')

y_pred = np.rint(model.predict(X_test))

print(accuracy_score(y_test, y_pred))

Predicting on test data
0.0


In [None]:
from sklearn.metrics import confusion_matrix

y_pred_unencoded = np.argmax(y_pred, axis=1)

y_test_unencoded = np.argmax(y_test, axis=1)

print(confusion_matrix(y_test_unencoded, y_pred_unencoded))

[[12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [ 2  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [76  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [84  0  0  0  0  0  0  0  0]]


In [8]:
num_classes = 9
epochs = 10
batch_size = 32

In [9]:
from keras.models import Model
from keras.layers import Input
from keras import optimizers


base_model = applications.VGG16(include_top=False, weights='imagenet')
input = Input(shape=(256, 256, 3), name = 'image_input')
vgg_output = base_model(input)

top_model = Flatten()(vgg_output)
top_model = Dense(64, activation='relu')(top_model)
predictions = Dense(num_classes, activation='softmax', name='prediction_layer')(top_model)

model = Model(inputs=input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
layers = base_model.layers[:-2]
for layer in layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 256, 256, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, None, None, 512)   14714688  
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 dense (Dense)               (None, 64)                2097216   
                                                                 
 prediction_layer (Dense)    (None, 9)                 585       
                                                                 
Total params: 16,812,489
Trainable params: 4,457,609
Non-trainable params: 12,354,880
_________________________________________________________________


In [11]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, shuffle=True, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f44e8f79630>

In [12]:
model.save_weights('vgg_top.h5')

In [13]:
model.load_weights('vgg_top.h5')

In [14]:
print('Predicting on test data')
#y_pred = np.rint(model.predict(X_test))

y_pred = model.predict(X_test)

print(y_pred)


Predicting on test data
[[9.99942064e-01 8.66416661e-12 4.64329517e-08 ... 8.03195803e-08
  3.98407719e-05 2.32866222e-12]
 [5.91485403e-08 1.16494277e-06 2.21993268e-08 ... 9.99187171e-01
  1.48981245e-08 1.43309648e-12]
 [3.81489400e-07 3.01149133e-10 3.11269514e-06 ... 3.39216058e-07
  1.02117617e-07 3.43778456e-12]
 ...
 [2.76096377e-12 1.24947151e-14 4.06296802e-11 ... 7.01134809e-07
  2.76558779e-07 2.56537633e-19]
 [1.75310243e-02 1.13070475e-09 6.95066760e-08 ... 4.68123006e-03
  3.04723962e-05 3.98920957e-12]
 [9.89983797e-01 6.55703447e-09 1.16199071e-05 ... 3.06496280e-04
  3.23442509e-04 1.29190558e-09]]


In [15]:
y_pred = np.rint(y_pred)

print(y_pred)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [16]:
from sklearn.metrics import accuracy_score



In [17]:
print(accuracy_score(y_test, y_pred))

0.48598130841121495


In [18]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 256, 256, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, None, None, 512)   14714688  
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 dense (Dense)               (None, 64)                2097216   
                                                                 
 prediction_layer (Dense)    (None, 9)                 585       
                                                                 
Total params: 16,812,489
Trainable params: 4,457,609
Non-trainable params: 12,354,880
_________________________________________________________________


In [19]:
print(base_model.layers[-2].name)

block5_conv3


In [20]:
from sklearn.metrics import confusion_matrix

y_pred_unencoded = np.argmax(y_pred, axis=1)

y_test_unencoded = np.argmax(y_test, axis=1)

print(confusion_matrix(y_test_unencoded, y_pred_unencoded))

[[ 8  0  0  0  2  0  2  0  0]
 [ 0  2  0  0  2  0  0  0  0]
 [ 0  0  0  0  2  0  0  0  0]
 [ 4  0  0  6  0  0  0  0  0]
 [ 0  0  0  0 76  0  0  0  0]
 [ 2  0  0  0  3  4  0  1  0]
 [ 2  0  0  0  0  2  8  0  0]
 [ 0  0  0  0  2  0  2  0  0]
 [63  0  1  4  5  8  3  0  0]]


In [10]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.applications import ResNet50, ResNet101, ResNet152


In [11]:
EPOCHS = 20
BATCH_SIZE = 32
IMG_SIZE = 256
num_classes = 9

In [12]:
def build_model(ResNet, img_size, n):
    inp = Input(shape=(img_size,img_size, n))
    resnet = ResNet(input_shape=(img_size,img_size,n),
                    weights='imagenet',
                    include_top=False)

    # freeze ResNet
    resnet.trainable = False
    x = resnet(inp)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    x = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=inp, outputs=x)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model



In [11]:
resnet50 = build_model(ResNet50, IMG_SIZE, 3)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
resnet50.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 8, 8, 2048)        23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 9)                 18441     
                                                                 
Total params: 23,606,153
Trainable params: 18,441
Non-trainable params: 23,587,712
____________________________________________

In [13]:
checkpoint = tf.keras.callbacks.ModelCheckpoint('resnet50.h5',
                                                monitor='loss',
                                                save_best_only=True,
                                                save_weights_only=True)


In [14]:
resnet50.fit(X_train, y_train, batch_size=BATCH_SIZE,
          epochs=EPOCHS, verbose=0, callbacks=[checkpoint])


<keras.callbacks.History at 0x7f05e01a6650>

In [15]:
resnet50.load_weights('resnet50.h5')


In [20]:
resnet50.evaluate(X_test, y_test)




[1.5572044849395752, 0.42990654706954956]

In [19]:
from sklearn.metrics import accuracy_score

print('Predicting on test data')
y_pred = np.rint(resnet50.predict(X_test))

print(accuracy_score(y_test, y_pred))

Predicting on test data
0.0


In [21]:
from sklearn.metrics import confusion_matrix

y_pred_unencoded = np.argmax(y_pred, axis=1)

y_test_unencoded = np.argmax(y_test, axis=1)

print(confusion_matrix(y_test_unencoded, y_pred_unencoded))

[[12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [ 2  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [76  0  0  0  0  0  0  0  0]
 [10  0  0  0  0  0  0  0  0]
 [12  0  0  0  0  0  0  0  0]
 [ 4  0  0  0  0  0  0  0  0]
 [84  0  0  0  0  0  0  0  0]]


In [13]:
resnet152 = build_model(ResNet152, IMG_SIZE, 3)


In [14]:
resnet152.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 resnet152 (Functional)      (None, 8, 8, 2048)        58370944  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 9)                 18441     
                                                                 
Total params: 58,389,385
Trainable params: 18,441
Non-trainable params: 58,370,944
____________________________________________

In [15]:
checkpoint = tf.keras.callbacks.ModelCheckpoint('resnet152.h5',
                                                monitor='loss',
                                                save_best_only=True,
                                                save_weights_only=True)


In [16]:
resnet152.fit(X_train, y_train, batch_size=BATCH_SIZE,
          epochs=EPOCHS, verbose=0, callbacks=[checkpoint])


<keras.callbacks.History at 0x7f0da7a5a950>

In [17]:
resnet152.load_weights('resnet152.h5')

In [29]:
y_pred = resnet152.predict(X_test)



In [30]:
print(y_pred)

[[0.0913308  0.05359313 0.03665922 ... 0.05498597 0.08981112 0.27745995]
 [0.08076365 0.06690872 0.03369468 ... 0.05626995 0.08851091 0.29218143]
 [0.07404404 0.06626953 0.03198276 ... 0.05420496 0.08124533 0.3144185 ]
 ...
 [0.08042213 0.05068485 0.03292871 ... 0.05550446 0.08530418 0.30937624]
 [0.07265574 0.07011493 0.03080931 ... 0.05346893 0.08688118 0.3143069 ]
 [0.07590387 0.05748452 0.03157727 ... 0.05379097 0.08348969 0.32320946]]


In [33]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test, y_pred))

InvalidParameterError: ignored

In [23]:
print(y_pred)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [18]:
resnet152.evaluate(X_test, y_test)




[1.6268986463546753, 0.3925233781337738]