In [None]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
import os

import numpy as np

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, classification_report
import itertools

%matplotlib inline


In [None]:
batch_size = 32  # The default batch size of keras.
num_classes = 3  # Number of class for the dataset which are using

In [None]:
# download cifar10 dataset from keras
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


**Make a new dataset for automobile dog and horse**

In [None]:
index = np.where((y_train == 1) | (y_train == 5) | (y_train == 7))
x_train = x_train[index[0]]
y_train = y_train[index[0]]

In [None]:
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
#print(x_train.shape[0], 'train samples')

In [None]:
index = np.where((y_test == 1) | (y_test == 5) | (y_test == 7))
x_test = x_test[index[0]]
y_test = y_test[index[0]]

In [None]:
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

**Re-labeling data**

In [None]:
for n, i in enumerate(y_train):
    if i == 1:
        y_train[n] = 0
    elif i==5:
        y_train[n]= 1
    else:
        y_train[n]= 2

In [None]:
for n, i in enumerate(y_test):
    if i == 1:
        y_test[n] = 0
    elif i==5:
        y_test[n]= 1
    else:
        y_test[n]= 2

In [None]:
fig, axs = plt.subplots(1,2,figsize=(15,5)) 
# Count plot for training set
sns.countplot(y_train.ravel(), ax=axs[0])
axs[0].set_title('Distribution of training data')
axs[0].set_xlabel('Classes')
# Count plot for testing set
sns.countplot(y_test.ravel(), ax=axs[1])
axs[1].set_title('Distribution of Testing data')
axs[1].set_xlabel('Classes')
plt.show()

In [None]:
# Normalize the data. Before we need to connvert data type to float for computation.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.0
x_test /= 255.0

# Convert class vectors to binary class matrices. This is called one hot encoding.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
#define the convnet
model = Sequential()
# CONV2D => CONV2D => BATCHNORMALIZATION => POOL => DROPOUT
model.add(Conv2D(32, (3, 3), padding='same',input_shape=x_train.shape[1:],activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# CONV2D => CONV2D => BATCHNORMALIZATION => POOL => DROPOUT
model.add(Conv2D(64, (3, 3), padding='same',activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# CONV2D => CONV2D => BATCHNORMALIZATION => POOL => DROPOUT
model.add(Conv2D(128, (3, 3), padding='same',activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# FLATTERN => DENSE => RELU => DROPOUT
model.add(Flatten())

model.add(Dense(512,activation='relu'))
model.add(Dropout(0.5))
# a softmax classifier
model.add(Dense(num_classes,activation='softmax'))


model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
history = None  # For recording the history of trainning process.
data_augmentation = True #for using data augmentation 

if not data_augmentation:
    print('Not using data augmentation.')
    history = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        rotation_range=0,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0., 
        zoom_range=0.,
        fill_mode='nearest',
        horizontal_flip=True,
        vertical_flip=False,
        rescale=None,
        validation_split=0.0)

    datagen.fit(x_train)
    history = model.fit_generator(datagen.flow(x_train, y_train,
                                    batch_size=batch_size),
                                    epochs=40,
                                    validation_data=(x_test, y_test),
                                    workers=4)

In [None]:
def plotmodelhistory(history): 
    fig, axs = plt.subplots(1,2,figsize=(15,5)) 
    # summarize history for accuracy
    axs[0].plot(history.history['accuracy']) 
    axs[0].plot(history.history['val_accuracy']) 
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy') 
    axs[0].set_xlabel('Epoch')
    axs[0].legend(['train', 'validate'], loc='upper left')
    # summarize history for loss
    axs[1].plot(history.history['loss']) 
    axs[1].plot(history.history['val_loss']) 
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss') 
    axs[1].set_xlabel('Epoch')
    axs[1].legend(['train', 'validate'], loc='upper left')
    plt.show()

# list all data in history
print(history.history.keys())

plotmodelhistory(history)


In [None]:
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

# make prediction.
pred = model.predict(x_test)

In [None]:
labels = ['Automobile', 'Dog', 'Horse']

# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(pred, axis=1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(y_test, axis=1)

In [None]:
print(classification_report(Y_true, Y_pred_classes))

In [None]:
R = 5
C = 5
fig, axes = plt.subplots(R, C, figsize=(12,12))
axes = axes.ravel()

for i in np.arange(0, R*C):
    axes[i].imshow(x_test[i])
    axes[i].set_title("True: %s \nPredict: %s" % (labels[Y_true[i]], labels[Y_pred_classes[i]]))
    axes[i].axis('off')
    plt.subplots_adjust(wspace=1)


In [None]:
def show_similar(number,dict_key):
    for i in range(300): #Only taking few samples from test dataset because its taking to long to find all similar images from 3000 images of test dataset
        test_image = np.expand_dims(x_test[i], axis=0)
        test_result = model.predict_classes(test_image)
        #plt.imshow(x_test[number])
        dict_key_get = test_result[0]
        if dict_key==Y_true[i]:
            plt.figure(figsize=(3, 3))
            plt.imshow(x_test[i])
            plt.title("Similar images \nPredicted: {} \nTrue Label: {}".format(labels[dict_key_get],labels[Y_true[i]]))
plt.show()

In [None]:
def show_test(number):
    fig = plt.figure(figsize = (3,3))
    test_image = np.expand_dims(x_test[number], axis=0)
    test_result = model.predict_classes(test_image)
    plt.imshow(x_test[number])
    dict_key = test_result[0]
    plt.title("Orignal images \nPredicted: {} \nTrue Label: {}".format(labels[dict_key],labels[Y_true[number]]))
    #print(dict_key)
    show_similar(number,Y_true[number])

In [None]:
show_test(1000) #similar images are showing according to its true label #25=dog,1000=horse, 2000=automobile