<font size=6>***This is my code to train a deep learning model to differentiate pictures of Cats and Dogs***.
We have 25000 pictures of dogs and cats for this task.

**In the first part, I will build a model drom scratch and see what accuracy I get. Then I will apply transfer learning to use a pre-trained model to achieve a higher accuracy**

<font size=6>***SECTION 1: MY OWN MODEL FROM SCRATCH***

In [None]:
#import needed libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pyplot as mpimg
%matplotlib inline
from shutil import copyfile
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array

In [None]:
#PREPARE DATA (Pictures)
#need to create right directories for the ImageGeneratorFunction to differentiate two categories (cat folder and dog folder)
#also need to prepare validation images

!rm -r train
!rm -r validation
source_train_direct='../input/dogs-vs-cats-redux-kernels-edition/train'
source_test_direct='../input/dogs-vs-cats-redux-kernels-edition/test'

#create my own directories that will contain two classes (cats and dogs)
#there will be also folder with validation pictures
os.mkdir('train')
os.mkdir('train/cat')
os.mkdir('train/dog')
os.mkdir('validation')
os.mkdir('validation/cat')
os.mkdir('validation/dog')
#define paths to this folders
train_cat_path='train/cat'
train_dog_path='train/dog'
test_cat_path='validation/cat'
test_dog_path='validation/dog'
#loop over images and copy them into the right directory
#so images are seperated into two validation and train folders
#and so they are seperated into cat and dog foders
for i in range (12500):
    cat='cat.'+str(i)+'.jpg'
    dog='dog.'+str(i)+'.jpg'
    #get source paths
    cat_temp_source=os.path.join(source_train_direct,cat)
    dog_temp_source=os.path.join(source_train_direct,dog)
    if i<12250:
        cat_temp_path=os.path.join(train_cat_path,cat)
        dog_temp_path=os.path.join(train_dog_path,dog)
    else:
        cat_temp_path=os.path.join(test_cat_path,cat)
        dog_temp_path=os.path.join(test_dog_path,dog)
    #copy file
    copyfile(cat_temp_source,cat_temp_path)
    copyfile(dog_temp_source,dog_temp_path)
    print('Copied',(i+1)*2,'out of 25,000 files', end='\r')
    
#now all the folders are sort out, so ImageGeneratorFunction will know that there are two categories


In [None]:
#take a look at some images
img=mpimg.imread('train/cat/cat.1.jpg')
imgplot=plt.imshow(img)
plt.show()

img=mpimg.imread('train/dog/dog.10.jpg')
imgplot=plt.imshow(img)
plt.show()

In [None]:
#Use ImageDataGenerator to set up image pipeline
#prepares images to be put in the model
#also allows us to use data_augmentation for more accuracy

image_size=150

data_generator_with_aug=ImageDataGenerator(horizontal_flip=True,
                                           rescale=1./255,
                                           rotation_range=40,
                                           width_shift_range = 0.2,
                                           height_shift_range = 0.2,
                                           shear_range=0.2,
                                           zoom_range=0.2,
                                           fill_mode='nearest' #fill missing values with the nearest value
                                          )

data_generator_no_aug=ImageDataGenerator(rescale=1./255)

train_generator=data_generator_with_aug.flow_from_directory(
        directory='train',
        target_size=(image_size, image_size),
        batch_size=100,
        class_mode='categorical')

train_generator_no_aug=data_generator_no_aug.flow_from_directory(
        directory='train',
        target_size=(image_size, image_size),
        batch_size=100,
        class_mode='categorical')

validation_generator=data_generator_no_aug.flow_from_directory(
        directory='validation',
        target_size=(image_size, image_size),
        batch_size=100,
        class_mode='categorical')

train_generator.class_indices

In [None]:
#CONSTRUCT A MODEL
#now image data is prepared, lets see which one will work
image_size=150
num_classes=2
model=Sequential()
model.add(Conv2D(24, kernel_size=(3,3), strides=2,
                activation='relu',
                input_shape=(image_size, image_size,3)))
model.add(Dropout(0.5))
model.add(Conv2D(24, kernel_size=(3, 3), strides=2,
                activation='relu'))
model.add(Dropout(0.5))
model.add(Conv2D(24, kernel_size=(3,3), strides=2,
                activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

#compile the model
model.compile(loss=keras.losses.categorical_crossentropy,
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
#TRAIN THE MODEL
#train with augmentation
history=model.fit_generator(train_generator,
                    steps_per_epoch=24500//100,#batch_size=100
                    epochs=4,
                    validation_data=validation_generator,
                    validation_steps=500//100)

plt.plot(history.history['acc'])

In [None]:
#now we see examples of images and what our model predicted for that image
from keras.preprocessing import image
from IPython.display import Image, display
img_path='validation/dog/dog.12300.jpg'
display(Image(img_path))
#resize the image so model can read it
img=image.load_img(img_path, target_size=(150,150))
array=image.img_to_array(img)
array=np.expand_dims(array, axis=0)
array=array/255
prediction=model.predict_proba(array)
print('Cat and after Dog probability:'+ str(prediction))
prediction=model.predict_classes(array)
if prediction==[1]: 
    prediction=['Dog']
else:
    prediction=['Cat']
print('What model prediction to be in the picture:'+ str(prediction))
#now we can pull up any picture to see if the model predicted it right by just changing the directory

<font size=6>**NOW I WILL USE TRASFER LEARNING AND SEE WHAT ACCURACY I GET**
Remember that our image data is already prepared. All we need is to chose a pre-trained model, work with it.</font>

<font size=6>**CHAPTER 2: USING A PRE-TRAINED MODEL** DATA PREP WILL BE THE SAME AS WE DID BEFORE. WE WILL JUST NEED LESS PICTURES TO TRAIN THE MODEL.</font>

There is a code below for transfer learning. However, we are not going to execute it but a viewer can copy the code and run it.

#data prep, IT IS THE SAME BUT WE JUST NEED LESS PICTURES FOR PRE-TRAINED MODEL TO WORK
#all the same as above but with less pictures needed as model was already trained
#clean up from previous work
!rm -r train
!rm -r validation
source_train_direct='../input/dogs-vs-cats-redux-kernels-edition/train'
source_test_direct='../input/dogs-vs-cats-redux-kernels-edition/test'
os.mkdir('train')
os.mkdir('train/cat')
os.mkdir('train/dog')
os.mkdir('validation')
os.mkdir('validation/cat')
os.mkdir('validation/dog')
train_cat_path='train/cat'
train_dog_path='train/dog'
test_cat_path='validation/cat'
test_dog_path='validation/dog'

for i in range (175): #less pictures needed
    cat='cat.'+str(i)+'.jpg'
    dog='dog.'+str(i)+'.jpg'
    cat_temp_source=os.path.join(source_train_direct,cat)
    dog_temp_source=os.path.join(source_train_direct,dog)
    if i<150:
        cat_temp_path=os.path.join(train_cat_path,cat)
        dog_temp_path=os.path.join(train_dog_path,dog)
    else:
        cat_temp_path=os.path.join(test_cat_path,cat)
        dog_temp_path=os.path.join(test_dog_path,dog)
    copyfile(cat_temp_source,cat_temp_path)
    copyfile(dog_temp_source,dog_temp_path)
    print('Copied',(i+1)*2,'out of 25,000 files', end='\r')

#take a look at some images
img=mpimg.imread('train/cat/cat.1.jpg')
imgplot=plt.imshow(img)
plt.show()

img=mpimg.imread('train/dog/dog.10.jpg')
imgplot=plt.imshow(img)
plt.show()
image_size=150

data_generator_with_aug=ImageDataGenerator(horizontal_flip=True,
                                           rescale=1./255,
                                           rotation_range=40,
                                           width_shift_range = 0.2,
                                           height_shift_range = 0.2,
                                           shear_range=0.2,
                                           zoom_range=0.2,
                                           fill_mode='nearest'
                                          )
data_generator_no_aug=ImageDataGenerator(rescale=1./255)
train_generator=data_generator_with_aug.flow_from_directory(
        directory='train',
        target_size=(image_size, image_size),
        batch_size=50,
        class_mode='categorical')

validation_generator=data_generator_no_aug.flow_from_directory(
        directory='validation',
        target_size=(image_size, image_size),
        batch_size=50,
        class_mode='categorical')
train_generator.class_indices

<font size=6>**Let's finally use a pre-trained model** .</font>

# we will use ResNet pre-trained model
from tensorflow.python.keras.applications import ResNet50
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D

num_classes=2
resnet_weights_path = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'

new_model=Sequential()
new_model.add(ResNet50(include_top=False, pooling='avg', weights=resnet_weights_path))
new_model.add(Dense(num_classes, activation='softmax'))
#do not need to train pre-trained model
new_model.layers[0].trainable=False
#compile
new_model.compile(loss=keras.losses.categorical_crossentropy,
             optimizer='adam',
             metrics=['accuracy'])

#USUALLY FOR PRE_TRAINED MODELS WE USE MUCH LESS DATA TO TRAIN THE MODEL (300 IMAGES IS ALREADY ENOUGH)
#So even though accuracy might be high, we might have caused overfitting by training a model with too many pictures
#but to not write more code I will use the same data with the same pipelines, I used before
#TRAIN THE MODEL
#train with augmentation
history=new_model.fit_generator(train_generator,
                    steps_per_epoch=500//50,#batch_size=50
                    epochs=4,
                    validation_data=validation_generator,
                    validation_steps=100//50)

plt.plot(history.history['acc'])