# Image Classification and Lime

### Import the libraries

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn import preprocessing, cross_validation, grid_search, metrics, ensemble
import scipy
import os
import lime
from lime import lime_image
from skimage.segmentation import mark_boundaries
from keras.applications import vgg16, inception_v3, resnet50
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.models import Sequential
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import load_model
import cv2

### Loading in the images and splitting into train/test

In [2]:
path_to_data = "data/food"

folders = os.listdir(path_to_data)[0:4]

folders.remove('.DS_Store')

print(folders)

['apple_pie', 'baby_back_ribs', 'baklava']


In [3]:
data = []
labels = []

for folder in folders:
    for food in os.listdir(path_to_data + "/" + str(folder)):
         if food.lower().endswith(('.jpeg', '.jpg','.png', '.tiff')):
                img = cv2.imread(os.path.join(path_to_data, folder, food))
                im = cv2.resize(img, (224,224))
                data.append(im)
                labels.append(folder)

data = np.array(data)

### Creating a dictionary for encoding labels into integers for models and later decoding them

In [4]:

foodpedia = {folders[0]:0, folders[1]:1, folders[2]:2}
inverse_foodpedia = {0:folders[0], 1:folders[1], 2:folders[2]}


labels = [foodpedia[food] for food in labels]

In [5]:
data[0:2000].shape

(2000, 224, 224, 3)

### Splitting the data into train and test

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(data[0:2000], labels[0:2000], test_size = 0.3)

### If you have previous model - load it

In [7]:

### Load previous saved model 

#filepath = "model_initial.h5"  # fill the filepath in here
#model = load_model(filepath)

In [8]:
img_width, img_height = 224, 224

num_classes = 3
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
X_train /= 255
X_test /= 255

print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)


x_train shape: (1400, 224, 224, 3)
1400 train samples
600 test samples


### Defining a Convolution Neural Network Architecture

In [9]:

model = vgg16.VGG16(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3))

# Freeze the layers which you don't want to train. Here I am freezing the first 5 layers.
for layer in model.layers[:5]:
    layer.trainable = False

#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation="softmax")(x)

# creating the final model 
model_final = Model(input = model.input, output = predictions)

print(model_final.summary())

# compile the model 
model_final.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics=["accuracy"])

# Save the model according to the conditions  
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

### Training the CNN on training data and validating on test

In [None]:

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)


# Fit the model
history = model_final.fit_generator(datagen.flow(X_train, Y_train, batch_size=32), steps_per_epoch=len(X_train) / 32, 
                                    validation_data=(X_test,Y_test), epochs=10, callbacks = [checkpoint, early])

score = model_final.evaluate(X_test, Y_test, verbose=0)
print("Accuracy on test set: ",score[1])



Epoch 1/10


### Plotting training/validation loss and accuracy 

In [None]:


plt.figure(figsize=(8,3))
plt.plot(history.history['loss'], 'g', linewidth = 2.0)
plt.plot(history.history['val_loss'], 'r', linewidth = 2.0)
plt.title('Plots of training vs validation loss', fontsize = 15)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Training Loss', 'Validation Loss'], fontsize = 10)


plt.figure(figsize=(8,3))
plt.plot(history.history['acc'], 'g', linewidth = 2.0)
plt.plot(history.history['val_acc'], 'r', linewidth = 2.0)
plt.title('Plots of training vs validation accuracy', fontsize = 15)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize = 10)

plt.show()



### Testing out the predictions on random image

In [None]:
obs =500  ### change this depending on which observation you want to predict

# expand_dims will add an extra dimension to the data at a particular axis
# We want the input matrix to the network to be of the form (batchsize, height, width, channels)
# Thus we add the extra dimension to the axis 0.

test = np.expand_dims(X_test[obs], axis=0)

prediction = inverse_floropedia[int(model.predict_classes(test))]
true = inverse_floropedia[Y_test[obs]]

print('')
start =  '\033[1m'
end = '\033[0m'
print(start + 'I predict a {}. The actual label is {}.' .format(prediction, true)  + end)
print('')
%matplotlib inline
plt.figure(figsize= (10,10))
plt.imshow(X_test[obs])

### Now lets explain the results using lime

In [None]:

explainer = lime_image.LimeImageExplainer()

# Hide color is the color for a superpixel turned OFF. 
## Alternatively, if it is NONE, the superpixel will be replaced by the average of its pixels# Hide  
explanation = explainer.explain_instance(X_test[obs], model.predict, top_labels=1, hide_color=0, num_samples=1000)


In [None]:

temp, mask = explanation.get_image_and_mask(4, positive_only=False, num_features=5, hide_rest=False)

plt.imshow(mark_boundaries(temp, mask))