### Create a CNN model for image classification using Keras

In [1]:
# import modules

import os
from skimage.io import imread
import skimage
import numpy as np
import matplotlib.pyplot as plt
from warnings import filterwarnings
filterwarnings('ignore')
import cv2


from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten
from keras import backend as K
from keras import optimizers

Using TensorFlow backend.


In [2]:
# calculate image file paths

ROOT_PATH = 'D:/Study/DataScience/Data/DeepLearningData/vehicles'

training_path = os.path.join(ROOT_PATH, 'train')
testing_path = os.path.join(ROOT_PATH, 'test')

fname = []
for root,d_names,f_names in os.walk(training_path):
    for f in f_names:
        fname.append(os.path.join(root, f))


# prepare images

nrows = 150
ncolumns = 150
channels = 3

def read_and_process_image(list_of_images):
    X = [] # images
    y = [] # labels

    for image in list_of_images:
        X.append(cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR),(nrows, ncolumns), interpolation = cv2.INTER_CUBIC))
        if 'cars' in image:
            y.append(1)
        elif 'planes' in image:
            y.append(0)
    return X,y


In [3]:
# Feature and target variable

X, y = read_and_process_image(fname)

# Convert list into numpy arroys

X = np.array(X)
y = np.array(y)

In [4]:
print(X.shape)
print(y.shape)

(400, 150, 150, 3)
(400,)


In [151]:
# get the count of unique values of classes
# total 8359 valus : 0 - 200
#                    1 - 200

unique_elements, counts_elements = np.unique(y, return_counts=True)
print("Frequency of unique values of the said array:")
print(np.asarray((unique_elements, counts_elements)))

Frequency of unique values of the said array:
[[  0   1]
 [200 200]]


In [152]:
# split the data into training and testing datasets

from keras.utils import to_categorical
#image_labels = to_categorical(labels)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)


In [153]:
print(X_train.shape)
print(y_train.shape)
#print(X_valtest.shape)
#print(y_valtest.shape)
print('\n')
print(X_test.shape)
print(y_test.shape)


(280, 150, 150, 3)
(280,)


(120, 150, 150, 3)
(120,)


In [154]:
# dimensions of our images.
img_width, img_height = 150, 150

epochs = 50
batch_size = 32

input_shape = (img_width, img_height, 3)

# create a function for classification
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
  


In [155]:
#X_train = X_train.reshape(-1,150, 150, 3)   #Reshape for CNN -  should work!!
#X_val = X_val.reshape(-1,150, 150, 3)
#X_test = X_test.reshape(-1,150, 150, 3)

model.compile(optimizer= optimizers.RMSprop(lr = 1e-4), loss='binary_crossentropy', metrics=['acc'])

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
                   validation_data=(X_test, y_test))
  
#model.evaluate(X_test, y_test)


Train on 280 samples, validate on 120 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50

KeyboardInterrupt: 

In [109]:
# Loss Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
  
# Accuracy Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['acc'],'r',linewidth=3.0)
plt.plot(history.history['val_acc'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)

NameError: name 'history' is not defined

<Figure size 576x432 with 0 Axes>

In [14]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

logit_roc_auc = roc_auc_score(y_test, model.predict(X_test))
fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test))
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()

ValueError: multilabel-indicator format is not supported