### Data pre-processing and data augmentation

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
from keras.preprocessing.image import ImageDataGenerator,load_img,img_to_array,array_to_img
from keras.applications.xception import Xception,preprocess_input

Using TensorFlow backend.


In [3]:
datagen=ImageDataGenerator(rotation_range=40,
                          width_shift_range=0.2,
                          height_shift_range=0.2,
                          shear_range=0.2,
                          zoom_range=0.2,
                          horizontal_flip=True,
                          fill_mode='nearest')

In [None]:
img=load_img("cat.2.jpg")
img=img_to_array(img)
img=np.expand_dims(img,axis=0)
img=preprocess_input(img)

i=0
for batch in datagen.flow(img,batch_size=1,save_to_dir='preview',save_prefix='cat',save_format='jpeg'):
    i+=1
    if i>20:
        break

In [5]:
def preprocess(im):
    im=img_to_array(im)
    im=np.expand_dims(im,axis=0)
    im=preprocess_input(im)
    return im[0]

### Project starts from here

In [8]:
data_dir=r"E:\003 IBM CE\3. Artificial Intelligence\Mody\Cat vs Dog Classification DL"

In [9]:
os.chdir(os.path.join(data_dir,"train",'cats')) #reading out training images for cat.
cat_train_img_names=glob.glob("*.jpg")

In [10]:
cat_train=[] #processing these images and storing them into a list.
for i in cat_train_img_names:
    im=load_img(i,target_size=(224,224,3))
    cat_train.append(preprocess(im))

In [11]:
os.chdir(os.path.join(data_dir,"train",'dogs'))
dogs_train_img_names=glob.glob("*.jpg")

In [12]:
dog_train=[]
for i in dogs_train_img_names:
    im=load_img(i,target_size=(224,224,3))
    dog_train.append(preprocess(im))

In [13]:
os.chdir(os.path.join(data_dir,"test",'cat'))
cat_test_img_names=glob.glob("*.jpg")

In [14]:
cat_test=[]
for i in cat_test_img_names:
    im=load_img(i,target_size=(224,224,3))
    cat_test.append(preprocess(im))

In [15]:
os.chdir(os.path.join(data_dir,"test",'dog'))
dog_test_img_names=glob.glob("*.jpg")

In [16]:
dog_test=[]
for i in dog_test_img_names:
    im=load_img(i,target_size=(224,224,3))
    dog_test.append(preprocess(im))

In [17]:
cat_test=np.array(cat_test,dtype='float32') #converting images into array.
cat_train=np.array(cat_train,dtype='float32')
dog_test=np.array(dog_test,dtype='float32')
dog_train=np.array(dog_train,dtype='float32')

In [18]:
cat_test.shape,cat_train.shape,dog_test.shape,dog_train.shape

((400, 224, 224, 3),
 (1000, 224, 224, 3),
 (400, 224, 224, 3),
 (1000, 224, 224, 3))

In [19]:
train=np.concatenate((cat_train,dog_train),axis=0) #concatenating training and testing images.
test=np.concatenate((cat_test,dog_test),axis=0)

In [20]:
train.shape

(2000, 224, 224, 3)

In [23]:
test.shape

(800, 224, 224, 3)

In [28]:
y_train=[0]*cat_train.shape[0]+[1]*dog_train.shape[0] #labelling training and testing images in order to create the target variable.
y_test=[0]*cat_test.shape[0]+[1]*dog_test.shape[0]

In [29]:
y_train=np.array(y_train,dtype='float32') #converting these target variable into a array form.
y_test=np.array(y_test,dtype='float32')

In [31]:
base_model=Xception(include_top=False,pooling='avg',input_shape=(224,224,3)) 
train_features=base_model.predict(train) #extracting the features by pretrained model and stacking a logistic layer as the output layer in order to do the binary classification as we hav only two classes (cat and dog).

In [35]:
pd.DataFrame(train_features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,0.150080,0.811858,0.342450,0.006123,0.148101,0.430158,0.007205,1.474634,0.701332,0.000000,...,0.119137,0.061524,0.061700,0.169260,0.120772,0.825829,0.000829,0.000902,0.157766,1.569686
1,0.077801,0.128635,0.053697,0.000000,0.207505,0.068977,0.000000,0.009002,0.002372,0.000000,...,0.000000,0.325826,0.000000,0.000902,0.012005,0.563872,0.000000,0.000000,0.000000,0.153744
2,0.253329,0.014175,0.773287,0.000000,0.013031,0.225522,0.000000,0.338383,0.000000,0.010185,...,0.207356,0.000000,0.159052,0.000000,0.046788,0.000000,0.629110,0.020101,0.033078,1.071828
3,0.441684,0.098145,0.706996,0.514371,0.000649,0.561485,0.007586,0.000000,0.653861,0.330843,...,0.000000,0.144847,1.024179,0.000000,0.717531,0.701420,0.667020,0.000023,0.430907,0.778511
4,0.591277,0.421175,0.571200,0.626708,0.749512,0.927735,0.113868,0.000723,0.012677,0.207897,...,0.486649,0.006397,0.493272,0.000000,0.177995,0.120101,0.186987,0.000000,0.817334,0.973186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.000000,0.130042,0.304204,0.086013,0.000000,0.000000,0.078088,0.770991,0.132231,1.532350,...,0.000000,0.008109,0.000000,0.008081,0.307886,0.000363,0.009148,0.000000,0.018864,0.000000
1996,0.039513,0.000000,0.180184,0.227867,0.016785,0.005885,0.005835,0.033974,0.597758,0.339649,...,0.087334,0.008494,0.000000,0.107313,0.213265,0.000000,0.000000,0.290743,0.318834,0.613553
1997,0.055679,0.000000,0.000000,0.044460,0.344969,0.006694,0.320004,0.000000,0.000000,0.152697,...,0.000000,0.713272,0.231147,0.058665,0.586854,0.172884,0.039588,0.003617,0.084551,0.060280
1998,0.012894,0.473242,0.125561,0.000000,0.038456,0.000000,0.821533,0.225250,1.106480,0.335779,...,0.026384,0.075430,0.000000,0.000000,0.561156,0.002739,0.136715,0.000000,0.000698,0.001302


In [36]:
train_features.shape

(2000, 2048)

In [37]:
from sklearn.linear_model import LogisticRegression
clf=LogisticRegression() #creating the instance of logistic regression.

In [38]:
clf.fit(train_features,y_train) #fitting the extracted trainned features and target variable into the model.

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [39]:
test_features=base_model.predict(test)

In [40]:
test_features.shape

(800, 2048)

In [41]:
preds=clf.predict(test_features)

In [42]:
(preds==y_test).sum()/y_test.shape[0] #model accuracy 98.5%

0.985