# Image classification problem.
https://www.kaggle.com/c/dogs-vs-cats
<br>
Provided set of images classify between cats and dogs.

## Data Preprocessing

In [1]:
"""Perform random transformation for classifier so that model will never see the exact same picture twice
This data augmentation will prevent the over fitting of the model"""
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
batch_size = 10

"""Augment training data to prevent overfitting"""
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

train_generator = train_datagen.flow_from_directory('data/train', target_size=(150, 150), 
                                                    batch_size=batch_size, class_mode='binary')

Found 24100 images belonging to 2 classes.


In [3]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory('data/test', target_size=(150, 150), 
                                                    batch_size=batch_size, class_mode='binary')

Found 300 images belonging to 2 classes.


In [4]:
"""Model for classification"""
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
model.fit_generator(
        train_generator,
        epochs=2,
        validation_data=validation_generator)

model.save_weights('model_1.h5')

Epoch 1/2
Epoch 2/2


Just in two epochs the accuracy on validation set is 0.71. More epochs will provide better results however need more resources to run.(Currently running on laptop)

## Running two more epochs

In [7]:
model.fit_generator(
        train_generator,
        epochs=2,
        validation_data=validation_generator)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f19daa537f0>

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 72, 72, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 64)        18496     
__________