# Kaggle Dataset Cat vs Dog Link

https://www.kaggle.com/c/dogs-vs-cats/overview

# Importing Libraries

In [117]:
import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion
from tensorflow import keras
import tensorflow
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression


TRAIN_DIR = './dataset/train' 
TEST_DIR = './dataset/test1'
IMG_SIZE = 50
LR = 1e-3 # 0.001  1 * 10^-3

MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR, '2conv-basic') # just so we remember which saved model is which, sizes must match

# Splitting Image Name

In [118]:
def label_img(img):
    word_label = img.split('.')[-3]
    # conversion to one-hot array [cat,dog]
    #                            [much cat, no dog]
    if word_label == 'cat': return [1,0]
    #                             [no cat, very doggo]
    elif word_label == 'dog': return [0,1]

# Creating The Training Dataset

In [119]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR,img)  # ./dataset/train  +  dog.9999.jpg
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)  # channels 1 
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img,dtype=object),np.array(label)])
    shuffle(training_data)
    return training_data


# Creating the Test Dataset

In [120]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img,dtype=np.float32), img_num])
    shuffle(testing_data)
    return testing_data

In [121]:
train_data = create_train_data()

100%|███████████████████████████████████████████████████████████████████████████| 25000/25000 [01:11<00:00, 351.81it/s]


In [122]:
train_data 

[[array([[228, 227, 229, ..., 233, 230, 181],
         [228, 228, 231, ..., 233, 230, 182],
         [230, 227, 230, ..., 232, 229, 183],
         ...,
         [71, 62, 58, ..., 227, 226, 167],
         [66, 74, 68, ..., 225, 225, 146],
         [59, 67, 72, ..., 209, 212, 91]], dtype=object),
  array([1, 0])],
 [array([[107, 131, 119, ..., 48, 41, 40],
         [115, 148, 108, ..., 58, 54, 42],
         [154, 197, 211, ..., 87, 76, 47],
         ...,
         [46, 56, 53, ..., 31, 29, 19],
         [130, 114, 63, ..., 113, 21, 26],
         [48, 71, 100, ..., 116, 26, 10]], dtype=object),
  array([0, 1])],
 [array([[159, 161, 146, ..., 40, 32, 29],
         [173, 177, 179, ..., 28, 24, 25],
         [168, 156, 165, ..., 26, 27, 25],
         ...,
         [151, 156, 178, ..., 228, 227, 225],
         [176, 165, 173, ..., 228, 223, 222],
         [178, 173, 170, ..., 228, 222, 222]], dtype=object),
  array([0, 1])],
 [array([[27, 30, 28, ..., 42, 43, 41],
         [34, 32, 41, ..., 37

In [123]:
test_data = process_test_data()

100%|███████████████████████████████████████████████████████████████████████████| 12500/12500 [01:04<00:00, 192.71it/s]


In [124]:
test_data

[[array([[112., 106.,  98., ...,  14.,  13.,  15.],
         [141., 111., 108., ...,  14.,  15.,  15.],
         [142., 123.,  99., ...,  16.,  16.,  15.],
         ...,
         [112., 111., 109., ...,  58.,  53.,  46.],
         [114., 112., 110., ...,  54.,  60.,  63.],
         [112., 112., 112., ...,  60.,  64.,  61.]], dtype=float32),
  '6118'],
 [array([[120., 122., 123., ..., 176., 174., 185.],
         [116., 117., 117., ..., 176., 167., 192.],
         [116., 119., 124., ..., 173., 174., 186.],
         ...,
         [120., 128., 114., ..., 155., 165., 175.],
         [120., 122., 119., ..., 169., 171., 159.],
         [123., 109., 118., ..., 166., 180., 177.]], dtype=float32),
  '10172'],
 [array([[178., 134., 174., ..., 226., 207., 232.],
         [169., 158., 149., ..., 189., 175., 194.],
         [167., 171., 134., ..., 177., 164., 183.],
         ...,
         [193., 185., 168., ..., 188., 180., 204.],
         [194., 163., 170., ..., 197., 194., 204.],
         [185., 1

# Testing the First CNN MODEL (Lower Accuracy)

In [None]:
# Convolutional Neural Network >>> CNN  ConvNet
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded!')

In [None]:
train = train_data[:-500]  # 25000 >>>24500
test = train_data[-500:]   #500   
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]


model.fit({'input': X}, {'targets': Y}, n_epoch=3, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

# Restting The Model (reset the graph instance, since we're doing operations in a continuous environment)

In [98]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

# The Second CNN MODEL (HIGHER ACCURACY)

In [99]:
convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)
convnet = conv_2d(convnet, 192, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)
convnet = conv_2d(convnet, 96, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)
convnet = conv_2d(convnet, 48, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)


convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.2)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')



if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded!')

train = train_data[:-500]
test = train_data[-500:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

model.fit({'input': X}, {'targets': Y}, n_epoch=20,validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=100, show_metric=True, run_id=MODEL_NAME)

Training Step: 7659  | total loss: [1m[32m0.19242[0m[0m | time: 39.190s
| Adam | epoch: 020 | loss: 0.19242 - acc: 0.9215 -- iter: 24448/24500
Training Step: 7660  | total loss: [1m[32m0.19168[0m[0m | time: 40.298s
| Adam | epoch: 020 | loss: 0.19168 - acc: 0.9231 | val_loss: 0.67838 - val_acc: 0.8320 -- iter: 24500/24500
--


In [34]:
model.save('model.tflearn')

# Testing the Model with New Data 

In [None]:
import matplotlib.pyplot as plt

# if you need to create the data:
#test_data = process_test_data()
# if you already have some saved:
test_data = np.load('test_data.npy',allow_pickle=True)

fig=plt.figure()
fig.set_size_inches(10.5, 8.5)
for num,data in enumerate(test_data[:10]):
    # cat: [1,0]
    # dog: [0,1]
    
    img_num = data[1]
    img_data = data[0]
    
    y = fig.add_subplot(2,5,num+1)
    orig = img_data
    data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    #model_out = model.predict([data])[0]
    model_out = model.predict([data])[0]
    
    if np.argmax(model_out) == 1: str_label='Dog'
    else: str_label='Cat'
        
    y.imshow(orig,cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
plt.show()