In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import glob
from PIL import Image

In [2]:
CATEGORIES = ['femaleeyes', 'maleeyes']

def check_dimensions():
    """
    Given folder path, return the largest and smallest dimensions.
    """
    x_max = 0
    y_max = 0
    x_min = 200
    y_min = 200
    for cat in CATEGORIES:
        path_list = glob.glob(os.path.join('dataset', cat, '*.jpg'))
        for image_path in path_list:
            image = np.array(Image.open(image_path))
            if image.shape[0] > x_max:
                x_max = image.shape[0]
            if image.shape[1] > y_max:
                y_max = image.shape[0]
            if image.shape[0] < x_min:
                x_min = image.shape[0]
            if image.shape[1] < y_min:
                y_min = image.shape[0]
            if image.shape[0] != image.shape[1]:
                # Print any pair of dimensions in case of non-square.
                print(f"Irregular: {image.shape[0]}, {image.shape[1]}")
    return x_max, y_max, x_min, y_min

In [3]:
check_dimensions()

(117, 117, 41, 41)

In [4]:
RESIZE_DIM = 50
INPUT_SHAPE = (RESIZE_DIM, RESIZE_DIM, 3)
BS = 32
EPOCHS = 10

In [5]:
def generate_entry(image_path, category):
    """
    Given file path and of an image, return a pandas series of a numpy array of
    pixel data (width, height, channels), along with category in integer indices
    if category (string) is given.
    """
    image = Image.open(image_path)
    image = image.resize((RESIZE_DIM, RESIZE_DIM))
    return np.array(image) / 255.0, CATEGORIES.index(category)

In [6]:
def get_images():
    """
    Given folder path, return a pandas dataframe of image data.
    """
    image_X = []
    image_y = []
    for cat in CATEGORIES:
        path_list = glob.glob(os.path.join('dataset', cat, '*.jpg'))
        for image_path in path_list:
            image_array, category_int = generate_entry(image_path, cat)
            image_X.append(image_array)
            image_y.append(category_int)
    X = np.stack(image_X)
    y = np.array(image_y)
    return X, y

In [7]:
def shuffle_images(X, y):
    """
    Given image data X and category y, shuffle them in the same order.
    """
    rng_state = np.random.get_state()
    np.random.shuffle(X)
    # Use the same RNG state so that X and y are shuffled in the same way.
    np.random.set_state(rng_state)
    np.random.shuffle(y)
    return X, y

In [8]:
def create_model():
    """
    Create the neural network model.
    """
    inputs = tf.keras.Input(shape=INPUT_SHAPE)
    # First convolutional layer
    x = tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation='relu')(inputs)
    x = tf.keras.layers.BatchNormalization(axis=-1)(x)
    x = tf.keras.layers.MaxPool2D(pool_size=(3, 3))(x)
    x = tf.keras.layers.Dropout(0.25)(x)
    # Second convolutional layer
    x = tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation='relu')(inputs)
    x = tf.keras.layers.BatchNormalization(axis=-1)(x)
    x = tf.keras.layers.MaxPool2D(pool_size=(3, 3))(x)
    x = tf.keras.layers.Dropout(0.25)(x)
    # Third convolutional layer
    x = tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation='relu')(inputs)
    x = tf.keras.layers.BatchNormalization(axis=-1)(x)
    x = tf.keras.layers.MaxPool2D(pool_size=(3, 3))(x)
    x = tf.keras.layers.Dropout(0.25)(x)
    # First fully connected layer
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(100, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    # Final fully connected layer
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inputs, outputs=x, name="image_classification")
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics = ['accuracy'])
    return model

In [9]:
model = create_model()

In [10]:
X, y = shuffle_images(*get_images())

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [12]:
hist = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BS)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
# Prediction from testing dataset

y_pred = model.predict(X_test) > 0.5

In [14]:
# Confusion amtrix from testing dataset

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

array([[ 982,   44],
       [ 172, 1107]])