In [18]:
import tensorflow as tf
import os
import numpy as np
import cv2
from tensorflow.keras.applications.inception_v3 import preprocess_input
from keras_preprocessing import image
import pandas as pd
from tqdm import tqdm
from threading import Thread
import seaborn as sns
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

current_path = os.path.join(os.getcwd(), '..')
base_path = f'{current_path}/data/raw/cats_and_dogs_filtered'
train_path = os.path.join(f'{base_path}/train/')
validation_path = os.path.join(f'{base_path}/validation/')

try:
    os.mkdir(f'{current_path}/data/processed')
except Exception:
    pass

In [8]:
# (train / validation)
dogs_dataset = (os.path.join(train_path, 'dogs'),
                os.path.join(validation_path, 'dogs'))
cats_dataset = (os.path.join(train_path, 'cats'),
                os.path.join(validation_path, 'cats'))



In [11]:
# Create the base model from the pre-trained model MobileNet V2
xception = tf.keras.applications.Xception(include_top=False,
                                        weights='imagenet',
                                        pooling='avg')


In [14]:
model = tf.keras.Sequential()
model.add(xception)
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))


In [122]:
for layer in model.layers[:-4]:
    layer.trainable = False
model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])



In [51]:
def extract_features(img_dir, label):
    img = cv2.imread(img_dir)
    img = cv2.resize(img, (299, 299))
    return [preprocess_input(img), label]

def extract_data(dataset, label):
    features = []
    for img_path in tqdm(os.listdir(dataset)):
        img_dir = os.path.join(dataset, img_path)
        features.append(extract_features(img_dir, label))
    return features

train_dataset = pd.concat([pd.DataFrame(x) for x in [
    extract_data(dogs_dataset[0], 'dog'),
    extract_data(cats_dataset[0], 'cat')
]])


val_dataset = pd.concat([pd.DataFrame(x) for x in [
    extract_data(dogs_dataset[1], 'dog'),
    extract_data(cats_dataset[1], 'cat')
]])

100%|██████████| 1000/1000 [00:10<00:00, 98.95it/s]
100%|██████████| 1000/1000 [00:09<00:00, 102.61it/s]
100%|██████████| 500/500 [00:04<00:00, 100.49it/s]
100%|██████████| 500/500 [00:04<00:00, 115.37it/s]


In [105]:

def fix_X_array(raw_data):
    return np.concatenate(raw_data).reshape((raw_data.shape[0], 299, 299, 3))

def fix_Y_array(raw_data):
    ravel = np.ravel(raw_data.iloc[:,1:].to_numpy())
    y_labels = []
    for y in ravel:
        y_labels.append(1 if y == 'dog' else 0)
    return np.array(y_labels)

X_train = fix_X_array((train_dataset.iloc[:,0]).to_numpy())
y_train = tf.keras.utils.to_categorical(fix_Y_array(train_dataset))

X_val = fix_X_array((val_dataset.iloc[:,0]).to_numpy())
y_val = tf.keras.utils.to_categorical(fix_Y_array(val_dataset))


In [88]:
X_train.shape

(2000, 299, 299, 3)

In [None]:
X_val.shape

In [None]:
train_Y_df = pd.DataFrame('cat' if 'cat.' in Y else 'dog' for Y in train_dataset.loc[:, 0])
validation_Y_df = pd.DataFrame('cat' if 'cat.' in Y else 'dog' for Y in validation_dataset.loc[:, 0])

train_Y_df.to_csv(os.path.join(
    current_path, 'data/processed/train_Y.csv'
), header=False, index=False, sep=';')


validation_Y_df.to_csv(os.path.join(
    current_path, 'data/processed/validation_Y.csv'
), header=False, index=False, sep=';')

