In [1]:
import os, cv2, re, random
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from keras import layers, models, optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split


In [2]:
TRAIN_DIR = r'C:/Users/Prashant/Desktop/train/train/'
TEST_DIR = r'C:/Users/Prashant/Desktop/test1/test1/'

ROWS = 150
COLS = 150
CHANNELS = 3


In [3]:
train_images_dogs_cats = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)]

train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

test_images_dogs_cats = [TEST_DIR+i for i in os.listdir(TEST_DIR)]

original_train_images = train_dogs[:12000] + train_cats[:12000]
random.shuffle(original_train_images)

train_images = original_train_images[:18000]
val_images = original_train_images[22000:]

In [4]:
len(val_images)


2000

In [5]:
def prepare_data(images):
    count = len(images)
    X = np.ndarray((count, ROWS, COLS, CHANNELS), dtype=np.float32)
    Y = np.zeros((count,), dtype=np.float32)
    
    for i, image_file in enumerate(images):
        img = load_img(image_file, target_size=(ROWS, COLS))
        X[i] = img_to_array(img)
        if 'dog' in image_file:
            Y[i] = 1.
        else:
            Y[i]= 0
        if i%1000 == 0: print('Processed {} of {}'.format(i, count))
    
    return X, Y



In [6]:
X_train, Y_train = prepare_data(train_images)




Processed 0 of 18000
Processed 1000 of 18000
Processed 2000 of 18000
Processed 3000 of 18000
Processed 4000 of 18000
Processed 5000 of 18000
Processed 6000 of 18000
Processed 7000 of 18000
Processed 8000 of 18000
Processed 9000 of 18000
Processed 10000 of 18000
Processed 11000 of 18000
Processed 12000 of 18000
Processed 13000 of 18000
Processed 14000 of 18000
Processed 15000 of 18000
Processed 16000 of 18000
Processed 17000 of 18000


In [7]:
X_val, Y_val = prepare_data(val_images)


Processed 0 of 2000
Processed 1000 of 2000


In [8]:
train_datagen = ImageDataGenerator(
    rescale=1./255,    
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,)

test_datagen = ImageDataGenerator(rescale=1./255)


In [13]:
train_generator = train_datagen.flow(
    X_train,
    Y_train,
    batch_size=32)

test_generator = test_datagen.flow(
    X_val,
    Y_val,
    batch_size=32)


In [14]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense,Dropout,Flatten, Conv2D, MaxPool2D
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam



In [15]:
img_width, img_height = 150, 150


model = Sequential()

model.add(Conv2D(32, (3,3), input_shape=(ROWS, COLS, CHANNELS), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(0.4))

model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(0.4))

model.add(Conv2D(256, (3,3), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(0.4))

model.add(Conv2D(512, (1,1), activation='relu'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Flatten())
model.add(Dropout(0.4))

model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [17]:
model.fit_generator(
    train_generator,
    epochs=30,
    validation_data=test_generator,
    callbacks=EarlyStopping(monitor='val_loss', mode='min',patience=2,verbose=1))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 00020: early stopping


<tensorflow.python.keras.callbacks.History at 0x1dcee724908>

In [18]:
evaluation_images = train_dogs[12000:12500] + train_cats[12000:12500]
random.shuffle(evaluation_images)

X_evaluation, y_evaluation = prepare_data(evaluation_images)
X_evaluation /= 255


Processed 0 of 1000


In [19]:
evaluation = model.evaluate(X_evaluation, y_evaluation)




In [20]:
evaluation

[0.2756955623626709, 0.8930000066757202]

In [21]:
print(X_train.shape,X_val.shape)


(18000, 150, 150, 3) (2000, 150, 150, 3)


In [22]:
type(Y_val)


numpy.ndarray

In [23]:
import keras
model.save('dogcat.model')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: dogcat.model\assets


In [24]:
X_test,_ = prepare_data(test_images_dogs_cats)
X_test/= 255.

Processed 0 of 12500
Processed 1000 of 12500
Processed 2000 of 12500
Processed 3000 of 12500
Processed 4000 of 12500
Processed 5000 of 12500
Processed 6000 of 12500
Processed 7000 of 12500
Processed 8000 of 12500
Processed 9000 of 12500
Processed 10000 of 12500
Processed 11000 of 12500
Processed 12000 of 12500


In [111]:
import numpy as np
import pandas as pd
results = model.predict(X_test)
results


array([[0.9465711 ],
       [0.06986928],
       [0.67779046],
       ...,
       [0.8095223 ],
       [0.06008096],
       [0.38172534]], dtype=float32)

In [112]:
results = np.round(results)
results

array([[1.],
       [0.],
       [1.],
       ...,
       [1.],
       [0.],
       [0.]], dtype=float32)

In [115]:
results = pd.DataFrame(results, columns =['label'])
results

Unnamed: 0,label
0,1.0
1,0.0
2,1.0
3,1.0
4,1.0
...,...
12495,0.0
12496,1.0
12497,1.0
12498,0.0


In [116]:
submission = pd.concat([pd.Series(range(1,12501),name = "id"),results],axis = 1)


In [117]:
submission

Unnamed: 0,id,label
0,1,1.0
1,2,0.0
2,3,1.0
3,4,1.0
4,5,1.0
...,...,...
12495,12496,0.0
12496,12497,1.0
12497,12498,1.0
12498,12499,0.0


In [118]:

submission.to_csv("dogs-vs-cats-sub.csv",index=False)


