We unzip the images into a directory

In [None]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Conv2D, MaxPooling2D

In [None]:
# import os
# print(os.listdir("../input"))

import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats/train.zip", "r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")

In [None]:
import os

main_dir = "/kaggle/working/"
train_dir = "train"
path = os.path.join(main_dir, train_dir)

for p in os.listdir(path):
    category = p.split(".")[0]
    img_array = cv2.imread(os.path.join(path, p), cv2.IMREAD_GRAYSCALE)
    new_img_array = cv2.resize(img_array, dsize=(80, 80))
    plt.imshow(new_img_array, cmap="gray")
    break

We declare our training array X and our target array y. Here X is the array of pixels and y ins the target. 

I am going to create a function that maps "dog" or "cat" into 1 and 0.



In [None]:
X = []
y = []

# we transform all dogs as 1, and the rest as 0
convert = lambda category : int(category == 'dog')
def create_test_data(path, images, targets):
    """Resize image into 80x80, append them into X array and category into y"""
    for p in os.listdir(path):
        category = p.split(".")[0]
        category = convert(category)
        img_array = cv2.imread(os.path.join(path,p), cv2.IMREAD_GRAYSCALE)
        new_img_array = cv2.resize(img_array, dsize=(80, 80))
        images.append(new_img_array)
        targets.append(category)

In [None]:
create_test_data(path, X, y)
X = np.array(X).reshape(-1, 80, 80, 1)
y = np.array(y)

# Model

1. We define a sequential model, ie layer-by-layer
2. We add layers to it
3. a. we add a Conv2D layer with 64 nodes and kernel size of 3,3 but other values might be good
3. b. we specify the input shape which is our X-shape. Activation is RELU but there are other kind
4. Now after every combination laye we always add max pooling with a size of 2,2. It calculates the maximum, or largest, value in each patch of each feature map

In [None]:
model = Sequential()
# I add a densely connected layer with 64 units to the model
model.add(Conv2D(64, (3,3), activation='relu', input_shape=X.shape[1:]))
model.add(MaxPooling2D(pool_size = (2,2)))

6. I skip it: I don't want to repeat the combination just for the sake of it
7. I add a flatten layer now.
8. In the end we add a final Dense layer. Activation can be sigmoid (for probability), else we use softmax. Here I will use sigmoid.

In [None]:
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(1, activation='sigmoid'))

Finally we compile the model. 

- We minimize the loss because NN always do so. To measure it we can use different formulas like 'categorical_crossentropy' or 'binary_crossentropy'. Here I will use the first.

- To optimize the cost function we use different methods like gradient descent. Here we use a default one: adam

- Our metric will be accuracy

In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['accuracy'])

Now we fit our model with the training data

- We try our model going through data 10 times/epochs
- We want 32 of our data to pass through the model each time
- we validate our model on 20% each time to check cross validation error

In [None]:
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

Now we preprocess our test data and do the same thing we did with our train data

In [None]:
train_dir = "test1"
path = os.path.join(main_dir, train_dir)

X_test = []
id_line = []

create_test_data(path, X_test, id_line)
X_test = np.array(X_test).reshape(-1, 80, 80, 1)

In [None]:
predictions = model.predict(X_test)

We need to round our values as we used sigmoid function and we got the probability values in our predicted dataset

Now we can upload our submission

In [None]:
submission_df = pd.DataFrame({'id':id_line, 'label':predicted_val})
submission_df.to_csv("submission.csv", index=False)