In [2]:
import os, glob, pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import cv2

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.callbacks import EarlyStopping, Callback

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, plot_confusion_matrix, confusion_matrix
from sklearn.preprocessing import LabelEncoder

np.random.seed(9)

### Phase 1 - Problem Definition  

#### 1.1 Broad Goals  

I have multiple small animals underfoot while I'm coding, cats and dogs.  While my dog is canine shaped and behaved, she is a small breed and ruff-ly the same size or smaller than my cat.  

#### 1.2 Data Source  

This project combines two open source datasets to create a single sample:  
    1. the [Stanford Dogs Dataset](http://vision.stanford.edu/aditya86/ImageNetDogs/) which includes a limited number of samples for each category and over 120 separate dog breeds.  
    2. the [Kaggle Cat Breeds Dataset](https://www.kaggle.com/ma7555/cat-breeds-dataset) which includes a limited number of samples for 67 different cat breeds

#### 1.3 Problem Statement 

While identifying which pet is at my feet is an easy task for me as a human, it becomes a larger challenge for a computer. This project aims to build a binary image classification model to check whether a pet is a cat or a dog from a photo, with a stretch goal of applying real-time testing to a webcam.

### Phase 2 - Data Gathering  
####    2.1 Loading Files


In [None]:
imgs = []

target_breeds = ['coated_retriever','Maltese_dog','Afghan_hound'] 

for file in glob.glob('./data/Images/*/*.jpg'):
    # retreive file name
    file_name = os.path.basename(file)
    # retreive breed name from directory name
    breed = os.path.dirname(file).split('-')[-1]
    # filter by targets
    if breed in target_breeds:        
        # load file
        img_arr = cv2.imread(file)
        #convert to color array
        img_rgb = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB)
        #reshape to uniform size 160x160x3
        new_arr = cv2.resize(img_rgb, (160, 160))
        #append to list as a 2-item dictionary
        imgs.append({'species' : breed, 'image' : new_arr, 'orig_shape': img_arr.shape })

#convert list to dataframe
df = pd.DataFrame.from_dict(imgs)

### Phase 3 - Exploratory Data Analysis  

#### 3.1 Dataset Shape

#### 3.2 Sample Images

#### 3.3 Principal Component Analysis

### Phase 4 - Modeling  

#### 4.1 Train/Test/Split

#### 4.2 Convolutional Neural Net

In [None]:
cnn = Sequential()
# Starting Layer
cnn.add(Conv2D(filters = 512,
               kernel_size = (3, 3),
               activation = 'relu',
               input_shape = (160, 160, 3)))
cnn.add(MaxPooling2D(pool_size = (2,2)b))
#second convolutional layer
cnn.add(Conv2D(filters=256,            
                     kernel_size=(3, 3),        
                     activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2,2)))

#third convolutional layer
cnn.add(Conv2D(filters=128,            
                     kernel_size=(3, 3),        
                     activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2,2)))

#fourth convolutional layer
cnn.add(Conv2D(filters=64,            
                     kernel_size=(3, 3),        
                     activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2,2)))

#flatten the metrics to fit into the Dense layers 
cnn.add(Flatten())
cnn.add(Dense(512, activation = 'relu'))
cnn.add(Dropout(0.3))
cnn.add(Dense(256, activation = 'relu'))
cnn.add(Dropout(0.3))
cnn.add(Dense(128, activation = 'relu'))
cnn.add(Dense(3, activation = 'softmax'))
cnn.compile(loss = 'categorical_crossentropy',
             optimizer = 'adam',
             metrics = ['accuracy'])
early_stop = EarlyStopping(patience = 5, restore_best_weights = True)

In [None]:
cnn.summary()

In [None]:
res = cnn.fit(X_train, y_train,
             batch_size = 64,
             validation_data = (X_test, y_test),
             epochs = 100,
             callbacks = [early_stop],
             verbose = 1)