In [19]:
from __future__ import print_function

import numpy as np
import tensorflow as tf
import keras
import keras.utils
from keras import utils as np_utils

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.layers.core import Dropout
from keras import backend as K
from keras.optimizers import Adam

from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd

from six.moves import cPickle as pickle
#from six.moves import range
#from scipy import ndimage
import os

import PIL.Image
from cStringIO import StringIO
import IPython.display

loading libraries: numpy for mathematical operation,pandas for dataset operation, PIL to load images, keras provide 
    environment for processing ( here tensorflow is in backend)

Functions to load images from files, resize and convert to a matrix using keras.preprocessing.image.

In [20]:
def load_image_filenames(folders):
    image_files = []
    
    for folder_tuple in folders:
        folder = folder_tuple[0]
        label_index = folder_tuple[1] - 1

        image_filepaths = [os.path.join(folder, image_filename) for image_filename in os.listdir(folder)]
        image_files.extend([(image_filepath, label_index) for image_filepath in image_filepaths])
        
    return image_files

def load_image(filename, target_size):
    try:
        img = image.load_img(filename, target_size=target_size)
    except IOError as e:
        print('Could not read:', filename, ':', e, ', skipping.')
        return None

    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x
    
def load_images(image_files, target_size):
    x_list = []
    y_list = []
    
    for image_file in image_files:
        image_filepath = image_file[0]
        label_index = image_file[1]

        x = load_image(image_filepath, target_size)
        #if x == None:
         #   continue
            
        x_list.append(x)

        y = np.zeros((1, 3))
        y[0, label_index] = 1
        y_list.append(y)
        
    X = np.vstack(x_list)
    y = np.vstack(y_list)
    
    return X, y

Functions to convert image pixels from -1:1 range to 0:255 range and to display an image. normalization is done because 
        at last some weights are too large to store and takes more time

In [21]:
def denormalize_input(x):
    """
    Converts image pixels from -1:1 range to 0:255 range.
    """
    x /= 2.
    x += 0.5
    x *= 255.
    return x

In [22]:
def show_array(a, fmt='jpg'):
    """
    Displays an image inside of Jupyter notebook.
    """
    a = np.uint8(a)
    f = StringIO()
    PIL.Image.fromarray(a).save(f, fmt)
    IPython.display.display(IPython.display.Image(data=f.getvalue()))

A function to display a confusion matrix and a classification report.

In [23]:
def show_report(model, X, y):
    """
    Displays a confusion matrix and a classification report.
    """
    y_predicted = np.argmax(model.predict(X), axis=1)
    y_true = np.argmax(y, axis=1)

    print("Confusion matrix (rows: true, columns: predicted)")
    print(confusion_matrix(y_true, y_predicted))
    print("")

    print("Classification report")
    print(classification_report(y_true, y_predicted))

Data loading and pre-processing
Load images from 'train' and 'additional' folders, shuffle and split into train and dev sets in 80/20 proportion.

In [24]:
train_folders = [('train/Type_1/', 1), ('train/Type_2/', 2), ('train/Type_3/', 3)] 

In [25]:
for folder_tuple in train_folders:
    print(folder_tuple[0], len(os.listdir(folder_tuple[0])))

train/Type_1/ 248
train/Type_2/ 782
train/Type_3/ 451


In [26]:
target_size=np.array([224,224])
train_fraction = 0.8

loading images from folder

In [11]:
image_files = load_image_filenames(train_folders)
np.random.seed(42)
np.random.shuffle(image_files)
print('files', len(image_files))

train_count = int(len(image_files) * train_fraction)

train_files = image_files[0:train_count]
dev_files = image_files[train_count:]

print('train_files', len(train_files))
print('dev_files', len(dev_files))

X_train, y_train = load_images(train_files, target_size=target_size)
X_dev, y_dev = load_images(dev_files, target_size=target_size)

files 1481
train_files 1184
dev_files 297
Could not read: train/Type_1/.DS_Store : cannot identify image file 'train/Type_1/.DS_Store' , skipping.
Could not read: train/Type_3/.DS_Store : cannot identify image file 'train/Type_3/.DS_Store' , skipping.
Could not read: train/Type_2/.DS_Store : cannot identify image file 'train/Type_2/.DS_Store' , skipping.


ValueError: all the input arrays must have same number of dimensions

Note able to load images in my laptop due to RAM issue. This part is done in another pc and saved in s.npz for training part and 
d.npz for validation

In [27]:
data= np.load('s.npz')

In [28]:
X_train= data['a']
y_train = data['b']
X_train.shape



(1184, 224, 224, 3)

In [29]:
data1= np.load('d.npz')

In [30]:
X_dev= data1['a']
y_dev= data1['b']
X_dev.shape

(296, 224, 224, 3)

In [31]:
#Data_augmentation
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

data augmentation is done to make the data far better by increasing the amount of data which results in less overfitting.

In [32]:
dropout_probability = 0.3
dense_layer_size = 512
batch_size = 64
epoch_count = 20

dropout used to decrease overfitting by selecting random neurons.

In [33]:
np.random.seed(42)

base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# add a fully-connected layer
x = Dense(dense_layer_size, activation='relu')(x)

# add a dropout layer for regularization
dropout = Dropout(dropout_probability)(x)

# and a logistic layer
predictions = Dense(3, activation='softmax')(dropout)

model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:200]:
   layer.trainable = False
for layer in model.layers[200:]:
   layer.trainable = True




transfer learning is used with the help of pretrained data imagenet. we have used it after 200th layer. We have not that much dataset to train so we have used pretrained data. after this i have used pooling layer  then dense layer then finally otput function softmax 

keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto')
reduce_lr= keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0.0001)

In [44]:
model.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=10**-8, decay=0.0, amsgrad=False), loss='categorical_crossentropy')
model.fit_generator(datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32.
                    , epochs=epoch_count,validation_data=(X_dev, y_dev))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f5414e3a750>

model.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=10**-8, decay=0.0, amsgrad=False), loss='categorical_crossentropy')

model.fit(x=X_train, y=y_train, batch_size=32, epochs=epoch_count, verbose=2, validation_data=(X_dev, y_dev), callbacks=[reduce_lr])

data i scompiled here. validation loss is inceases means overfitting occured. It can be from different ways like complex function is trained as training error is decreasing but val error increasing. we are trying to make more simple function by changing layers.

In [None]:
model_json = model.to_json()
with open("models.json","w") as json_file:
    json_file.write(model_json)
model.save_weights("models.h5")

model  and weights are saved 

In [45]:
show_report(model, X_dev, y_dev)

Confusion matrix (rows: true, columns: predicted)
[[  3  36   8]
 [  2 114  30]
 [  0  48  55]]

Classification report
             precision    recall  f1-score   support

          0       0.60      0.06      0.12        47
          1       0.58      0.78      0.66       146
          2       0.59      0.53      0.56       103

avg / total       0.59      0.58      0.54       296



In [None]:
to show report

In [1]:
from keras.models import model_from_json
json_file = open('models.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


this is the model whose weight and model is presaved. this model can be used to classify into type1, type2 and type3 cervix cancer.