In [None]:
# LIBRARIES
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt # Visualization
import seaborn as sb # Visualization
import PIL #Image viewer

# import glob to return all file paths that match a specific pattern
import os
from glob import glob
import itertools

# for Compatibility
from __future__ import print_function, division
from builtins import range, input

# Include tensorflow keras layers, models, and preprocessing

from tensorflow import keras 
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Importing confusion matrix to describe the performance of the model
from sklearn.metrics import confusion_matrix

In [None]:
# Include the directories of the dataset
train_path = '../Dataset/blood_cell_images/TRAIN'
valid_path = '../Dataset/blood_cell_images/TEST'

In [None]:
# Get the files from the directories
image_files = glob(train_path + '/*/*.jp*g')
valid_image_files = glob(valid_path + '/*/*.jp*g')
folders = glob(train_path + '/*')

In [None]:
# Check the length of training data
print(len(image_files))

In [None]:
# Check the length of validation data
print(len(valid_image_files))

In [None]:
# Check the number of classes
print(len(folders))

In [None]:
# View a sample image
plt.imshow(image.img_to_array(image.load_img(np.random.choice(image_files))).astype('uint8'))
plt.show()

In [None]:
# re-size all the images to this
IMAGE_SIZE = [224, 224] # feel free to change depending on dataset

In [None]:
# training config:
epochs = 3 # Increase the epochs or use callbacks
batch_size = 128

In [None]:
# add preprocessing layer to the front of VGG
res = ResNet50(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

In [None]:
# if you don't want to train existing weights, set False
# set True to Fine tune
for layer in res.layers:
    layer.trainable = False

In [None]:
# Adding the last layer corresponding to the number of classes.
# Use Softmax for multiclass classifcation
# More hidden layers can be added
x = Flatten()(res.output)
prediction = Dense(len(folders), activation='softmax')(x)

In [None]:
# create a model object
model = Model(inputs=res.input, outputs=prediction)

In [None]:
# view the structure of the model
model.summary()

In [None]:
# tell the model what cost and optimization method to use
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

How does ImageDataGenerator works!!

In [None]:
# Create an instance of ImageDataGenerator

train_gen = ImageDataGenerator(
  rotation_range=20,
  width_shift_range=0.1,
  height_shift_range=0.1,
  shear_range=0.1,
  zoom_range=0.2,
  horizontal_flip=True,
  vertical_flip=True,
  preprocessing_function=preprocess_input
)

val_gen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [None]:
test_gen = val_gen.flow_from_directory(valid_path, target_size = IMAGE_SIZE)
print(test_gen.class_indices)

In [None]:
labels = [None] * len(test_gen.class_indices)
for k, v in test_gen.class_indices.items():
    labels[v] = k

In [None]:
labels

In [None]:
# should be a strangely colored image (due to VGG weights being BGR)
for x, y in test_gen:
    print("min:", x[0].min(), "max:", x[0].max())
    plt.title(labels[np.argmax(y[0])])
    plt.imshow(x[0])
    plt.show()
    break

In [None]:
# create generators
train_generator = train_gen.flow_from_directory(
  train_path,
  target_size=IMAGE_SIZE,
  shuffle=True,
  batch_size=batch_size,
)

valid_generator = val_gen.flow_from_directory(
  valid_path,
  target_size=IMAGE_SIZE,
  shuffle=True,
  batch_size=batch_size,
)

In [None]:
# fit the model
r = model.fit(
  train_generator,
  validation_data=valid_generator,
  epochs=epochs,
  steps_per_epoch=len(image_files) // batch_size,
  validation_steps=len(valid_image_files) // batch_size,
)

# rms prop vs Adam


In [None]:
# loss
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend();

In [None]:
# accuracies
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend();

In [None]:
def get_confusion_matrix(data_path, N):
    print("Generating confusion matrix", N)
    predictions = []
    targets = []
    i = 0
    for x, y in val_gen.flow_from_directory(data_path, target_size=IMAGE_SIZE, shuffle=False, batch_size=batch_size * 2):
        i += 1
        if i % 50 == 0:
            print(i)
        p = model.predict(x)
        p = np.argmax(p, axis=1)
        y = np.argmax(y, axis=1)
        predictions = np.concatenate((predictions, p))
        targets = np.concatenate((targets, y))
        if len(targets) >= N:
            break

    cm = confusion_matrix(targets, predictions)
    return cm

In [None]:
cm = get_confusion_matrix(train_path, len(image_files))
print(cm)

In [None]:
valid_cm = get_confusion_matrix(valid_path, len(valid_image_files))
print(valid_cm)

In [None]:
from utils import plot_confusion_matrix
plot_confusion_matrix(cm, labels, title='Train confusion matrix')

In [None]:
plot_confusion_matrix(valid_cm, labels, title='Validation confusion matrix')

In [None]:
r.history.keys()

In [None]:
score = r.model.evaluate(valid_generator)

In [None]:
score

In [None]:
print(f"Model Accuracy: %{score[1]*100}")

In [None]:
# r.model.save("./")