# Preparing Final Dataset

In [4]:
from os import makedirs, listdir
from os.path import join
from shutil import copyfile, copy2


In [None]:
# creating directories
dataset_home = 'finaldata_dogs_vs_cats'
# create label subdirectories
subdirs = ['cats', 'dogs']
for dir in subdirs:
    makedirs(join(dataset_home, dir), exist_ok=True)
# copying training images to new directories
src_directory = 'train'
for file in listdir(src_directory):
    src = join(src_directory, file)
    if file.startswith('cat'):
        copy2(src, join(dataset_home, 'cats'))
    elif file.startswith('dog'):
        copy2(src, join(dataset_home, 'dogs'))


In [3]:
number_of_images = len(listdir(join(dataset_home, subdirs[0]))) + len(listdir(join(dataset_home, subdirs[1])))
number_of_images

25000

In [4]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
# define the cnn model
def define_model():
    # load model
    model = VGG16(include_top=False, input_shape=(224, 224, 3))
    for layer in model.layers:
        layer.trainable = False
    # adding new layers
    flat1 = Flatten()(model.layers[-1].output)
    class1 = Dense(units=128, activation='relu',
                   kernel_initializer='he_uniform')(flat1)
    output = Dense(units=1, activation='sigmoid')(class1)
    # create new model
    model = Model(inputs=model.input, outputs=output)
    opt = SGD(learning_rate=0.001, momentum=0.9)
    #compile the new model
    model.compile(optimizer=opt, loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model


In [3]:
define_model().summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [4]:
def run_test_harness():
    model = define_model()
    datagen = ImageDataGenerator(featurewise_center=True)
    datagen.mean = [123.68, 116.779, 103.939]
    train_it = datagen.flow_from_directory(
        directory='finaldata_dogs_vs_cats', class_mode='binary', target_size=(224, 224), batch_size=64)
    model.fit(train_it, steps_per_epoch=len(train_it), epochs=10, verbose=1)
    model.save('final_model.h5')


In [5]:
run_test_harness()

Found 25000 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [10]:
test_dataset = 'test1'
model = load_model('final_model.h5')

datagen = ImageDataGenerator(featurewise_center=True)
datagen.mean = [123.68, 116.779, 103.939]
test_it = datagen.flow_from_directory(
    directory=test_dataset, class_mode=None, target_size=(224, 224), color_mode='rgb', batch_size=1, shuffle=False)
predictions = model.predict(test_it, verbose=1)

Found 12500 images belonging to 1 classes.


In [19]:
import pandas as pd
import numpy as np

submission = pd.DataFrame({'id': test_it.filenames,'label': np.around(predictions[:, 0])})
submission['id'] = submission['id'].apply(lambda x: int(x.split('\\')[1].split('.')[0]))
submission.sort_values(by='id', inplace=True)
submission.head()

Unnamed: 0,id,label
0,1,1.0
3612,2,1.0
4723,3,1.0
5834,4,1.0
6945,5,0.0


In [18]:
submission.to_csv('submission.csv', index=False)