In [11]:
# downloading data

In [12]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [13]:
!kaggle datasets download -d salader/dogs-vs-cats

dogs-vs-cats.zip: Skipping, found more recently modified local copy (use --force to force download)


In [14]:
import zipfile
zip_ref = zipfile.ZipFile('/content/dogs-vs-cats.zip')
zip_ref.extractall('/content')
zip_ref.close()

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [16]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,BatchNormalization,Dropout

In [17]:
# generators : it will divide the whole data into batches
train_ds = keras.utils.image_dataset_from_directory(
    directory = '/content/train',
    labels ='inferred',
    label_mode = 'int',
    batch_size = 32,
    image_size = (256,256)
)

validation_ds = keras.utils.image_dataset_from_directory(
    directory = '/content/test',
    labels ='inferred',
    label_mode = 'int',
    batch_size = 32,
    image_size = (256,256)
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.


In [18]:
# Normalization
def process(image,label):
  image = tf.cast(image/255,tf.float32)
  return image,label

train_ds = train_ds.map(process)
validation_ds = validation_ds.map(process)

In [19]:
# create CNN model
model = Sequential()
model.add(Conv2D(32,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu',
                 input_shape = (256,256,3) ))

model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))
model.add(Conv2D(64,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu' ))

model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))

model.add(Conv2D(128,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu'))

model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))

model.add(Flatten())
model.add(Dense(128,activation = 'relu'))
model.add(Dense(64,activation = 'relu'))
model.add(Dense(1,activation = 'sigmoid'))


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 127, 127, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 30, 30, 128)       0

In [21]:
model.compile(
    optimizer = 'Adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

In [None]:
history = model.fit(train_ds,epochs = 10,validation_data = validation_ds)

Epoch 1/10

In [None]:
plt.plot(history.history['accuracy'],color = 'red',label = 'train')
plt.plot(history.history['val_accuracy'],color = 'blue',label = 'validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'],color = 'red',label = 'train')
plt.plot(history.history['val_loss'],color = 'green',label = 'validation')
plt.legend()
plt.show()

### Here loss of training data is decreasing but loss of validation data is increasing
### Its a condition of OVERFITTING to reduce it we can use following ways
1.Add more data
2. Data Augmentation
3. L1/L2 Regularizer
4. Dropout
5. BatchNormalization
6. Reduce Complexity

In [None]:
# improving CNN model using BatchNormalization,Dropout
model = Sequential()
model.add(Conv2D(32,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu',
                 input_shape = (256,256,3) ))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))
model.add(Conv2D(64,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu' ))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))

model.add(Conv2D(128,kernel_size =(3,3),
                 padding = 'valid',
                 activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2),
                       strides = 2, padding = 'valid'))

model.add(Flatten())
model.add(Dense(128,activation = 'relu'))
model.add(Dropout(0,1))
model.add(Dense(64,activation = 'relu'))
model.add(Dropout(0,1))
model.add(Dense(1,activation = 'sigmoid'))


In [None]:
history = model.fit(train_ds,epochs = 10,validation_data = validation_ds)

In [None]:
plt.plot(history.history['accuracy'],color = 'red',label = 'train')
plt.plot(history.history['val_accuracy'],color = 'blue',label = 'validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'],color = 'red',label = 'train')
plt.plot(history.history['val_loss'],color = 'green',label = 'validation')
plt.legend()
plt.show()

# Testing the result

In [None]:
import cv2

In [None]:
test_image = cv2.imread('/content/dog1.jpg')
plt.imshow(test_image)

In [None]:
test_image.shape

In [None]:
test_image = cv2.resize(test_image,(256,256))

In [None]:
test_input = test_image.reshape(1,256,256,3)

In [None]:
model.predict(test_input)

In [None]:
# showing 1 for dog and 0 for cat

In [None]:
test_image = cv2.imread('/content/cat.jpg')
plt.imshow(test_image)

In [None]:
test_image.shape

In [None]:
test_image = cv2.resize(test_image,(256,256))

In [None]:
test_input = test_image.reshape(1,256,256,3)

In [None]:
model.predict(test_input)