In [2]:
## data handling libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## image handling libraries
import cv2

## tensorflow libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Conv2D, MaxPooling2D

## os library
import os

In [3]:
current_dir = "./dogs-vs-cats/"
train_dir = "train/"
test_dir = "test1/"
train_path = os.path.join(current_dir, train_dir)
test_path = os.path.join(current_dir, test_dir)

In [4]:
input_array = []
label_array = []


for image in os.listdir(train_path):

    # cat = 0, dog = 1
    label = image.split(".")[0]
    label_int = 1 if label == "dog" else 0

    # change all pictures to gray scale, resize all images
    img_array = cv2.imread(os.path.join(train_path,image),cv2.IMREAD_GRAYSCALE)
    sized_img_array = cv2.resize(img_array, dsize=(60, 60)) # increase dsize to get clearer pictures


    input_array.append(sized_img_array)
    label_array.append(label_int)


## convert arrays to numpy arrays for efficiency
input_array = np.array(input_array).reshape(-1,60,60,1)
label_array = np.array(label_array)


In [5]:
## normalize the data
## gray scale ranges from 0 to 255
input_array = input_array/255.0

In [6]:
## convolutional neural network 

## create a Sequential model
model = Sequential()

## add first convolutional layer with 32 filters and (3,3) kernel size
## activation is relu 
model.add(Conv2D(32,(3,3), activation = 'relu', input_shape = input_array.shape[1:]))
model.add(MaxPooling2D(pool_size = (2,2)))

## add another convolutional layer with 64 filters and (3,3) kernel size
model.add(Conv2D(64,(3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Flatten())

## add dense layer with relu 
model.add(Dense(64, activation='relu'))

## add another dense layer with one output result using sigmoid
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=['accuracy'])

## run training data on model
model.fit(input_array, label_array, epochs=10, batch_size=32, validation_split=0.2)


Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x64891b5d0>

In [7]:
input_test_array = []
img_num_array = []

    
for image in os.listdir(test_path):

    # img numbers
    img_num_array.append(image.split(".")[0])

    # change all pictures to gray scale, resize all images
    img_array = cv2.imread(os.path.join(test_path,image),cv2.IMREAD_GRAYSCALE)
    sized_img_array = cv2.resize(img_array, dsize=(60, 60)) # increase dsize to get clearer pictures

    input_test_array.append(sized_img_array)



## convert arrays to numpy arrays
input_test_array = np.array(input_test_array).reshape(-1,60,60,1)
img_num_array = np.array(img_num_array)


In [8]:
## normalize the data
input_test_array = input_test_array/255.0

In [9]:
## use model to predict test data
predictions = model.predict(input_test_array)

In [10]:
## create array with dog(1) or cat(0) predictions
int_pred = [int(round(p[0])) for p in predictions]

In [11]:
## create a dataframe out of predictions
final_dataframe = pd.DataFrame({'img_num':img_num_array, 'label':int_pred})

In [12]:
final_dataframe.to_csv("prediction_results.csv", index=False)

In [13]:
final_dataframe

Unnamed: 0,img_num,label
0,9733,0
1,63,0
2,6400,1
3,823,1
4,4217,0
...,...,...
12495,3561,0
12496,8434,1
12497,7707,0
12498,6419,0
