In [1]:
from tensorflow.keras.utils import load_img
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input
import PIL
import zipfile
import shutil
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
import os
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')

In [None]:
!unzip '/content/UTKFace_Images.zip'

In [3]:
# Create base folder "faces" to store image set
# If folder does not exist, then create it
if not os.path.exists('/content/faces'):
    os.makedirs('/content/faces')

In [None]:
# Extract .jpg images from base UTKface_inthewild to destination folder (faces)
!tar -xzvf "/content/UTKface_inthewild/part1.tar.gz" -C "/content/faces/"

In [None]:
# Declare the base directory where image data is held
# Create empty lists to hold data
base_dir = '/content/faces/part1'
age_labels = []
gender_labels = []
image_paths = []

image_filenames = os.listdir(base_dir)
random.shuffle(image_filenames)

# Test print, ensure file names are present and in correct format
# print(image_filenames)

# Extract image component features (gender is index 0, age is index 1)
for image in tqdm(image_filenames):
    image_path = os.path.join(base_dir, image)
    img_components = image.split('_')
    age_label = int(img_components[0])
    gender_label = int(img_components[1])

    # Append image_path, age label, and gender label
    age_labels.append(age_label)
    gender_labels.append(gender_label)
    image_paths.append(image_path)

In [None]:
# Test print, ensure data length consistent across all extracted image features
# Should be 10137 age labels, gender labels, and image paths
print(f'Number of age_labels: {len(age_labels)}, Number of gender_labels: {len(gender_labels)}, Number of image_paths: {len(image_paths)}')

In [7]:
# Designate gender mapping for ease of use
gender_mapping = {1: 'Female', 0: 'Male'}

In [None]:
# Establish DataFrame, show first 5 data collections
import pandas as pd
df = pd.DataFrame()
df['image_path'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head(5)

In [None]:
from PIL import Image

# Create random index to pull image from collection
rand_index = random.randint(0, len(image_paths))
age = df['age'][rand_index]
gender = df['gender'][rand_index]
# Open image and title it using associated Age/Gender data
IMG = Image.open(df['image_path'][rand_index])
plt.title(f'Age: {age} Gender: {gender_mapping[gender]}')
plt.axis('off')
plt.imshow(IMG)

In [None]:
# Display collection of test images
plt.figure(figsize=(20, 20))
samples = df.iloc[0:16]

for index, sample, age, gender in samples.itertuples():
    plt.subplot(4, 4, index+1)
    img = load_img(sample)
    img = np.array(img)
    plt.axis('off')
    plt.title(f'Age: {age} Gender: {gender_mapping[gender]}')
    plt.imshow(img)

In [11]:
def extract_image_features(images):
  features = list()

  for image in tqdm(images):
    img = load_img(image, grayscale=True)
    #img = img.resize((128, 128), Image.ANTIALIAS)
    img = img.resize((128, 128), PIL.Image.LANCZOS)
    img = np.array(img)
    features.append(img)

  features = np.array(features)
  features = features.reshape(len(features), 128, 128, 1)

  return features

In [None]:
X = extract_image_features(df['image_path'])

In [None]:
# Verify the shape of X (target = 10137, 128, 128, 1)
X.shape

In [14]:
# Normalize each image
X = X / 255.0

In [15]:
y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

In [16]:
input_shape = (128, 128, 1)

In [17]:
inputs = Input((input_shape))
conv1 = Conv2D(32, (3, 3), activation='relu')(inputs)
max1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu')(max1)
max2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (3, 3), activation='relu')(max2)
max3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(256, (3, 3), activation='relu')(max3)
max4 = MaxPooling2D(pool_size=(2, 2))(conv4)

flatten = Flatten()(max4)

# Fully Connected Layers
dense1 = Dense(256, activation='relu')(flatten)
dense2 = Dense(256, activation='relu')(flatten)

dropout1 = Dropout(0.3)(dense1)
dropout2 = Dropout(0.3)(dense2)

output1 = Dense(1, activation='sigmoid', name='gender_out')(dropout1)
output2 = Dense(1, activation='relu', name='age_out')(dropout2)

model = Model(inputs=[inputs], outputs=[output1, output2])

model.compile(loss=['binary_crossentropy', 'mae'], optimizer='adam', metrics=['accuracy'])

In [None]:
# Plot Model
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
history = model.fit(x=X, y=[y_gender, y_age], batch_size=32, epochs=20, validation_split=0.2)

In [21]:
def get_img_features(image):
  img = load_img(image, grayscale=True)
  img = img.resize((128, 128), PIL.Image.LANCZOS)
  img = np.array(img)
  img = img.reshape(1, 128, 128, 1)
  img = img / 255.0

  return img

In [22]:
# Create test image folder "testing"
# If folder does not exist, then create it
if not os.path.exists('/content/TestImages'):
    os.makedirs('/content/TestImages')

In [None]:
test_image = '/content/TestImages/img2.JPG'

features = get_img_features(test_image)
pred = model.predict(features)
gender = gender_mapping[round(pred[0][0][0])]
age = round(pred[1][0][0])

plt.title(f'Age: {age} Gender: {gender}')
plt.axis('off')
plt.imshow(np.array(load_img(test_image)))