# Data Preprocessing:

In [12]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/sampleSubmission.csv
/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip


In [15]:
import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
    
    
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")

In [13]:
train_paths=os.listdir('/kaggle/working/train')

In [14]:
train_paths[:10]

['cat.4951.jpg',
 'cat.11829.jpg',
 'cat.9873.jpg',
 'cat.922.jpg',
 'dog.1910.jpg',
 'dog.871.jpg',
 'dog.8385.jpg',
 'dog.51.jpg',
 'dog.8891.jpg',
 'cat.8605.jpg']

In [4]:
len(train_paths)

25000

In [23]:
import os
os.mkdir('/kaggle/working/dogs')
os.mkdir('/kaggle/working/cats')

In [16]:
from shutil import copyfile
import random

for i in train_paths:
    if i.split('.')[0]=='dog':
        source='/kaggle/working/train/'+i
        destination='/kaggle/working/dogs/'+i
        copyfile(source,destination)
    else:
        source='/kaggle/working/train/'+i
        destination='/kaggle/working/cats/'+i
        copyfile(source,destination)

In [24]:
print(len(os.listdir('/kaggle/working/dogs/')))
print(len(os.listdir('/kaggle/working/cats/')))

12500
12500


In [18]:
try:
    os.mkdir('/kaggle/working/training')
    os.mkdir('/kaggle/working/testing')
    os.mkdir('/kaggle/working/training/cats')
    os.mkdir('/kaggle/working/training/dogs')
    os.mkdir('/kaggle/working/testing/cats')
    os.mkdir('/kaggle/working/testing/dogs')
except OSError:
    print(123)
    pass

In [19]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + "is zero length")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[-testing_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)



CAT_SOURCE_DIR = "/kaggle/working/cats/"
TRAINING_CATS_DIR = "/kaggle/working/training/cats/"
TESTING_CATS_DIR = "/kaggle/working/testing/cats/"
DOG_SOURCE_DIR = "/kaggle/working/dogs/"
TRAINING_DOGS_DIR = "/kaggle/working/training/dogs/"
TESTING_DOGS_DIR = "/kaggle/working/testing/dogs/"

split_size = .8
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

In [26]:
import tensorflow as tf
TRAINING_DIR = "/kaggle/working/training/"
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255)
train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=64,
                                                    class_mode='binary',
                                                    target_size=(224, 224))

VALIDATION_DIR = "/kaggle/working/testing/"
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255)
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                              batch_size=64,
                                                              class_mode='binary',
                                                              target_size=(224, 224))

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.


# Architecture of Model using pretrained Vgg16:

In [36]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow import keras
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPool2D 
ImageFlow = tf.keras.preprocessing.image.ImageDataGenerator()
 

model = VGG16(weights='imagenet',include_top=False,input_tensor=Input(shape=(224,224, 3)), classes=16)
x = model.output
x = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1),padding='valid',data_format='channels_last',
              activation='relu',kernel_initializer=tf.keras.initializers.he_normal(seed=0),name='Conv1')(x)
x= MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid',data_format='channels_last',name='Pool1')(x)
x= Flatten()(x)
x = Dense(units=30,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=32),name='FC1')(x)
x = Dense(units=15,activation='relu',kernel_initializer=tf.keras.initializers.glorot_normal(seed=33),name='FC2')(x)
x = Dense(units=1,activation='sigmoid',kernel_initializer=tf.keras.initializers.glorot_normal(seed=3),name='Output')(x)
from keras.models import Model
custom_model = Model(inputs=model.input, outputs=x)
for layer in model.layers[:19]:
    layer.trainable = False

custom_model.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)      

# Model_training:

In [37]:
custom_model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])
custom_model.fit_generator(train_generator,validation_data=validation_generator,verbose=1,epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f1a40069110>