In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
import pickle
import random

In [2]:

# 1. Preprocessing and creating training data


dir_path = "/home/parag/PetImages" # Path of Petimages

CATEGORIES = ["Dog", "Cat"]    #Petimages has two categories

training_data = []          # empty list for training data for our model

img_size_setting = 50               

def create_training_data():
    for category in CATEGORIES: 

        path = os.path.join(dir_path,category)  
        class_num = CATEGORIES.index(category)  # get the classification  (0 or a 1). 0=dog 1=cat

        for img in tqdm(os.listdir(path)):  # iterate over each image per dogs and cats
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)  #image read and conversion to grayscale 
                resize_array = cv2.resize(img_array, (img_size_setting, img_size_setting))  # resize images to IMG_SIZE to normalize data size
                training_data.append([resize_array, class_num])  
            except Exception as e:  
                pass
            
create_training_data()           

random.shuffle(training_data)  
# shuffling to avoid any element of bias/patterns in the split datasets before training the ML model.

print(len(training_data))
print(training_data[0])     #First element of of training_data 


100%|██████████| 12501/12501 [00:07<00:00, 1661.85it/s]
100%|██████████| 12501/12501 [00:06<00:00, 1794.23it/s]

24946
[array([[ 55,  58,  64, ..., 160, 160, 158],
       [ 49,  53,  60, ..., 159, 158, 158],
       [ 52,  52,  48, ..., 158, 160, 159],
       ...,
       [103, 101, 107, ..., 111, 105, 100],
       [ 90,  91, 102, ...,  97, 108, 113],
       [ 94, 100, 102, ..., 109, 118, 108]], dtype=uint8), 1]





In [3]:
# 2. Feature and label extraction from training_data
X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)
    


X = np.array(X).reshape(-1, img_size_setting, img_size_setting, 1) # reshaping list to array.

X = X/255.0   # Normalize to get values between 0 and 1

print(X.shape) # to see shape of X (first col has no. of images)
print(X[0])

#Model save to store our preprocessing 
# So that we dont have to do preprocessing again and again during training with different parameters

pickle_out = open("X.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()




(24946, 50, 50, 1)
[[[0.21568627]
  [0.22745098]
  [0.25098039]
  ...
  [0.62745098]
  [0.62745098]
  [0.61960784]]

 [[0.19215686]
  [0.20784314]
  [0.23529412]
  ...
  [0.62352941]
  [0.61960784]
  [0.61960784]]

 [[0.20392157]
  [0.20392157]
  [0.18823529]
  ...
  [0.61960784]
  [0.62745098]
  [0.62352941]]

 ...

 [[0.40392157]
  [0.39607843]
  [0.41960784]
  ...
  [0.43529412]
  [0.41176471]
  [0.39215686]]

 [[0.35294118]
  [0.35686275]
  [0.4       ]
  ...
  [0.38039216]
  [0.42352941]
  [0.44313725]]

 [[0.36862745]
  [0.39215686]
  [0.4       ]
  ...
  [0.42745098]
  [0.4627451 ]
  [0.42352941]]]


In [4]:
# 3. ML Model formulation

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import time
from keras.callbacks import TensorBoard
import pickle



# Loading pickle files

pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)


pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)


# A method to change 3 parameters of ML model in one go to see their effect on losses 

dense_layers = [0,1,2]             #No. of dense layers after CNN layers
layer_sizes = [64,128,256]         #No. of nodes in each layer
conv_layers = [2,3,4]               #No. of convolutional layers

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
            print(NAME)   # To remember various parameters of models in each iteration

            model = Sequential()

            model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for l in range(conv_layer-1):
                model.add(Conv2D(layer_size, (3, 3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())

            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))
                model.add(Dropout(0.2))

            model.add(Dense(1))   # This layer is seperate. Thats why it isn' there in the for loop above
            model.add(Activation('sigmoid'))

            tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) # A Keras callback function to visualize losses and accuracy during training 

            model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])  #  Could use other loss and optimization functions

            model.fit(X, y,batch_size=32, epochs=15,validation_split=0.3,callbacks=[tensorboard])


Using TensorFlow backend.


2-conv-64-nodes-0-dense-1578295683
Train on 17462 samples, validate on 7484 samples
Epoch 1/15
 2176/17462 [==>...........................] - ETA: 10s - loss: 0.6939 - accuracy: 0.5060



Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3-conv-64-nodes-0-dense-1578295732
Train on 17462 samples, validate on 7484 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
4-conv-64-nodes-0-dense-1578295787
Train on 17462 samples, validate on 7484 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
2-conv-128-nodes-0-dense-1578295860
Train on 17462 samples, validate on 7484 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
3-conv-128-nodes-0-dense-1578295925
Train on 17462 samples, validate on 74

In [5]:
# 4.  Choose a model that gives minimum validation loss from the above models

# I have chosen '1-dense-128 nodes-3CNN layers' model
# keep in mind that minimum training loss isnt a true parameter for a good model (model can remember data as well)
# No. of epochs can be changed as per tensorboard visual
# I have taken 10 epochs 


from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import time
from keras.callbacks import TensorBoard
import pickle



# Loading pickle files

pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)

dense_layers = [1]           
layer_sizes = [128]         
conv_layers = [3]               

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
            print(NAME)

            model = Sequential()

            model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for l in range(conv_layer-1):
                model.add(Conv2D(layer_size, (3, 3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())

            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))
                model.add(Dropout(0.2))

            model.add(Dense(1))  
            model.add(Activation('sigmoid'))

            tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) 

            model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])  

            model.fit(X, y,batch_size=32, epochs=10,validation_split=0.3,callbacks=[tensorboard])

model.save ('1-dense-128 nodes-3CNN layers') # save this model


3-conv-128-nodes-1-dense-1578298955
Train on 17462 samples, validate on 7484 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
# 5. To test model on outside images

# preprocessing of outside images
import tensorflow as tf 
import cv2
import numpy as np

CATEGORIES = ["Dog", "Cat"]  

filepath = '/home/parag/aa/dog.jpg'

def outside_img(filepath):
    img_size_setting = 50  
    img_array = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
    
    #img_array = np.float32(img_array)  We have to convert img_array dtype as predict command doesnot take uint8 as input array
   
    img_array= img_array/255.0     # or we can normailze img_array, automatic conversion into float
    

    
    resize_array = cv2.resize(img_array, (img_size_setting, img_size_setting))  
    return resize_array.reshape(-1, img_size_setting, img_size_setting, 1)  

model = tf.keras.models.load_model('1-dense-128 nodes-3CNN layers')


prediction = model.predict([outside_img(filepath)]) 

# predict always takes a list (even if its a single image)
# 1.jpg is the outside image filepath that you have saved in your current dir.

print(prediction)                           # It will be a list in a list

print(CATEGORIES[int(prediction[0][0])])    # for converting number into string (dog or cat)

[[0.07995712]]
Dog
