In [1]:
import numpy as np
import pandas as pd 
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import random
import os
import cv2 as cv

In [3]:
filenames = os.listdir("train")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

In [4]:
for i in range(100):
    df = df.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,filename,category
0,cat.11680.jpg,0
1,cat.10869.jpg,0
2,cat.1166.jpg,0
3,cat.9673.jpg,0
4,cat.10678.jpg,0
...,...,...
24995,dog.8894.jpg,1
24996,dog.7906.jpg,1
24997,cat.5172.jpg,0
24998,dog.12384.jpg,1


In [5]:
data_train = []
for i in range(25000):
    label = df['category'][i]
    img = cv.imread("train/"+df['filename'][i],cv.COLOR_BGR2RGB)
    data_train.append([np.array(img),np.array(label)])
#np.save('train_data.npy', data_train)

In [6]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))


model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
X = np.array([i[0] for i in data_train]).reshape(-1,128,128,3)
Y = np.array([i[1] for i in data_train])

In [8]:
X = X.astype('float32')
X = X / 255.0

In [9]:
X_valid = X[0:25000]
Y_valid = Y[0:25000]
X = X[0:25000]
Y = Y[0:25000]

In [10]:
X_valid.shape

(25000, 128, 128, 3)

In [11]:
model.fit(X, Y, epochs=5, batch_size = 128, validation_data=(X_valid,Y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2120bcc6460>

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 32)      896       
_________________________________________________________________
batch_normalization (BatchNo (None, 126, 126, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 61, 61, 64)        256       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 64)        0

In [13]:
model.save('dogVScat.model')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: dogVScat.model\assets


In [17]:
# testing
filenames = os.listdir("test1")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

for i in range(100):
    df = df.sample(frac=1).reset_index(drop=True)

data_test = []
for i in range(12500):
    label = df['category'][i]
    img = cv.imread("train/"+df['filename'][i],cv.COLOR_BGR2RGB)
    data_test.append([np.array(img),np.array(label)])
    
X = np.array([i[0] for i in data_test]).reshape(-1,128,128,3)
Y = np.array([i[1] for i in data_test])
X = X.astype('float32')
X = X / 255.0
X_valid = X[0:12500]
Y_valid = Y[0:12500]
X = X[0:12500]
Y = Y[0:12500]

ValueError: cannot reshape array of size 12500 into shape (128,128,3)

In [33]:
val_loss, val_acc = model.evaluate(X, Y)
print(val_loss)
print(val_acc)

0.4446032643318176
0.7922000288963318


In [34]:
np.argmax(model.predict(X[0].reshape(-1,128,128,3)))

1

In [35]:
Y[0]

1