This study tries to predict animal face use 4 different algorithms:
1. Logistic regression classification
1. SVM (Support Vector Machine) classification
1. Random forest classification
1. Decision tree classification


* The dataset contains 3 animal: cat, dog, and wild
* Source dataset: https://www.kaggle.com/andrewmvd/animal-faces

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn-whitegrid")
import matplotlib.image as implt
from PIL import Image
import seaborn as sns
import cv2 as cs2
import os
import shutil
import random
import os.path

import warnings
warnings.filterwarnings('ignore')

In [2]:
## import Keras and its module for image processing and model building
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization

# Processing Dataset 

The dataset came with only two divisions: a training dataset and a test dataset, in different folders.
However, we would also need a third division: a validation dataset in which we would train against before testing the final accuracy on the test dataset. This would allow us to avoid overfitting due to training samples in the dataset that we used to test.

We also need to shuffle the wild dataset into the new validation folder, because it seems that the images are not ramdomly distributed there (there seems to be some rough division according to each wild species, such as foxes, tigers, leons, and so on.)



In [4]:
original_dataset_dir = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\animal-faces\all'

base_dir = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input'
# os.mkdir(base_dir)

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)

test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

train_cats_dir = os.path.join(train_dir, 'cats')
os.mkdir(train_cats_dir)

train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_dogs_dir)

train_dogs_dir = os.path.join(train_dir, 'wild')
os.mkdir(train_dogs_dir)

validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)

validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)

train_dogs_dir = os.path.join(validation_dir, 'wild')
os.mkdir(train_dogs_dir)

test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)

test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)

train_dogs_dir = os.path.join(test_dir, 'wild')
os.mkdir(train_dogs_dir)

In [5]:
src_cat = os.path.join(original_dataset_dir, 'cat')
src_cat_imgs = (os.listdir(src_cat))

src_dog = os.path.join(original_dataset_dir, 'dog')
src_dog_imgs = (os.listdir(src_dog))

src_wild = os.path.join(original_dataset_dir, 'wild')
src_wild_imgs = (os.listdir(src_wild))

def valid_path(dir_path, filename):
    full_path = os.path.join(dir_path, filename)
    return os.path.isfile(full_path)

In [6]:
print(f'Cat images:{len(src_cat_imgs)}')
print(f'Dog images:{len(src_dog_imgs)}')
print(f'Wild images:{len(src_wild_imgs)}')

Cat images:5653
Dog images:5239
Wild images:5238


In [11]:
int(len(src_cat_imgs)*0.7)

3957

We will split the dataset in the following ratio:
1. Training dataset 70%
2. Validation dataset 15%
3. Test dataset 15%

In [14]:
# Test

cat_files = [os.path.join(src_cat, f) for f in src_cat_imgs if valid_path(src_cat, f)]
choices = random.sample(cat_files, int(len(src_cat_imgs)*0.15))
target_cat_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\test\cats'
for files in choices:
    shutil.move(files, target_cat_test)

dog_files = [os.path.join(src_dog, f) for f in src_dog_imgs if valid_path(src_dog, f)]
choices = random.sample(dog_files, int(len(src_dog_imgs)*0.15))
target_dog_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\test\dogs'
for files in choices:
    shutil.move(files, target_dog_test)

wild_files = [os.path.join(src_wild, f) for f in src_wild_imgs if valid_path(src_wild, f)]
choices = random.sample(wild_files, int(len(src_wild_imgs)*0.15))
target_wild_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\test\wild'
for files in choices:
    shutil.move(files, target_wild_test)

In [13]:
# Validation

cat_files = [os.path.join(src_cat, f) for f in src_cat_imgs if valid_path(src_cat, f)]
choices = random.sample(cat_files, int(len(src_cat_imgs)*0.15))
target_cat_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\validation\cats'
for files in choices:
    shutil.move(files, target_cat_test)

dog_files = [os.path.join(src_dog, f) for f in src_dog_imgs if valid_path(src_dog, f)]
choices = random.sample(dog_files, int(len(src_dog_imgs)*0.15))
target_dog_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\validation\dogs'
for files in choices:
    shutil.move(files, target_dog_test)

wild_files = [os.path.join(src_wild, f) for f in src_wild_imgs if valid_path(src_wild, f)]
choices = random.sample(wild_files, int(len(src_wild_imgs)*0.15))
target_wild_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\validation\wild'
for files in choices:
    shutil.move(files, target_wild_test)

In [15]:
# Training

cat_files = [os.path.join(src_cat, f) for f in src_cat_imgs if valid_path(src_cat, f)]
choices = random.sample(cat_files, int(len(src_cat_imgs)*0.70))
target_cat_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\train\cats'
for files in choices:
    shutil.move(files, target_cat_test)

dog_files = [os.path.join(src_dog, f) for f in src_dog_imgs if valid_path(src_dog, f)]
choices = random.sample(dog_files, int(len(src_dog_imgs)*0.70))
target_dog_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\train\dogs'
for files in choices:
    shutil.move(files, target_dog_test)

wild_files = [os.path.join(src_wild, f) for f in src_wild_imgs if valid_path(src_wild, f)]
choices = random.sample(wild_files, int(len(src_wild_imgs)*0.70))
target_wild_test = r'C:\Users\Pedro\Documents\Python Scripts\cv_animalfaces\input\train\wild'
for files in choices:
    shutil.move(files, target_wild_test)