In [37]:
import zipfile
import shutil
from tensorflow.keras.utils import load_img, plot_model
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input
import PIL
from PIL import Image
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
import os
import seaborn as sns
#import warnings
from tqdm.notebook import tqdm
#warnings.filterwarnings('ignore')

In [None]:
!unzip '/content/UTKface_inthewild-20240702T010559Z-001.zip'

In [17]:
# Create base folder "faces" to store image set
# If folder does not exist, then create it
if not os.path.exists('/content/faces'):
    os.makedirs('/content/faces')

In [None]:
# Extract .jpg images from base folder to destination folder (faces)
!tar -xzvf "/content/UTKface_inthewild/part1.tar.gz" -C "/content/faces/"

In [None]:
# Declare the base directory where image data is held
# Create empty lists to hold data
base_dir = '/content/faces/part1'
age_labels = []
gender_labels = []
image_paths = []

image_filenames = os.listdir(base_dir)
random.shuffle(image_filenames)

# Test print, ensure file names are present and in correct format
# print(image_filenames)

# Extract all image component features in data set
# Gender is index 0, Age is index 1)
for image in tqdm(image_filenames):
    image_path = os.path.join(base_dir, image)
    img_components = image.split('_')
    age_label = int(img_components[0])
    gender_label = int(img_components[1])

    # Appending the image_path, age label, and gender label into the associated list
    age_labels.append(age_label)
    gender_labels.append(gender_label)
    image_paths.append(image_path)

In [None]:
# Test print, ensure data length is consistent across all extracted image features
# Should be 10137 (age labels, gender labels, and image paths)
print(f'Number of age_labels: {len(age_labels)}, Number of gender_labels: {len(gender_labels)}, Number of image_paths: {len(image_paths)}')

In [40]:
# Designate gender mapping for ease of use
gender_mapping = {1: 'Female', 0: 'Male'}

In [None]:
# Establish DataFrame
# DF consists of the image_path, age, and gender
# Show first 5 data collections
df = pd.DataFrame()
df['image_path'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head(5)

In [None]:
# Create random index to pull an image from collection
rand_index = random.randint(0, len(image_paths))
age = df['age'][rand_index]
gender = df['gender'][rand_index]
# Open image and title it using the labeled Age/Gender data
IMG = Image.open(df['image_path'][rand_index])
plt.title(f'Age: {age} Gender: {gender_mapping[gender]}')
plt.axis('off')
plt.imshow(IMG)

In [43]:
# Setting up the image features list and image normalization
def extract_image_features(images):
  features = list()

  for image in tqdm(images):
    img = load_img(image, grayscale=True)
    img = img.resize((128, 128), PIL.Image.LANCZOS)
    img = np.array(img)
    features.append(img)

  features = np.array(features)
  features = features.reshape(len(features), 128, 128, 1)

  return features

In [None]:
# Extract features from all images in UTKFace data set
X = extract_image_features(df['image_path'])

In [None]:
# Verify the shape of X (target = 10137, 128, 128, 1)
X.shape

In [46]:
# Normalize each image
X = X / 255.0

In [47]:
# Define the two image outputs (from project scope)
y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

In [48]:
# Define input shape for CNN
input_shape = (128, 128, 1)

In [49]:
# Apply CNN filters to image
inputs = Input((input_shape))
conv1 = Conv2D(16, (3, 3), activation='relu')(inputs)
max1 = MaxPooling2D(2, 2)(conv1)
conv2 = Conv2D(32, (3, 3), activation='relu')(max1)
max2 = MaxPooling2D(2, 2)(conv2)
conv3 = Conv2D(64, (3, 3), activation='relu')(max2)
max3 = MaxPooling2D(2, 2)(conv3)
conv4 = Conv2D(128, (3, 3), activation='relu')(max3)
max4 = MaxPooling2D(2, 2)(conv4)
conv5 = Conv2D(256, (3, 3), activation='relu')(max4)
max5 = MaxPooling2D(2, 2)(conv5)

flatten = Flatten()(max5)

# Two Fully Connected Layers (one for each output, age and gender)
dense1 = Dense(256, activation='relu')(flatten)
dense2 = Dense(256, activation='relu')(flatten)

# Two Dropout Layers (one for each output, age and gender)
dropout1 = Dropout(0.25)(dense1)
dropout2 = Dropout(0.25)(dense2)

# Use sigmoid activation because gender value is binary
output1 = Dense(1, activation='sigmoid', name='gender_out')(dropout1)
# Use relu activation because age is an estimated value
output2 = Dense(1, activation='relu', name='age_out')(dropout2)

model = Model(inputs=[inputs], outputs=[output1, output2])
model.compile(loss=['binary_crossentropy', 'mae'], optimizer='adam', metrics=['accuracy'])

In [None]:
# Plot Model
# Shows split for both outputs after the Flatten layer
plot_model(model)

In [None]:
runtime = model.fit(x=X, y=[y_gender, y_age], batch_size=16, epochs=20, validation_split=0.2)

In [52]:
def get_img_features(image):
  img = load_img(image, grayscale=True)
  img = img.resize((128, 128), PIL.Image.LANCZOS)
  img = np.array(img)
  img = img.reshape(1, 128, 128, 1)
  img = img / 255.0

  return img

In [35]:
# Creating test image folder
# If folder does not exist, then create it
# Here you can store any image you would like to test
if not os.path.exists('/content/Test_Images'):
    os.makedirs('/content/Test_Images')

In [None]:
# Verify model accuracy using unlabeled image
# using img1.jpg - you will see a certain someone in his youth!
test_image = '/content/Test_Images/img1.jpg'

features = get_img_features(test_image)
pred = model.predict(features)
gender = gender_mapping[round(pred[0][0][0])]
age = round(pred[1][0][0])

plt.title(f'Age: {age} Gender: {gender}')
plt.axis('off')
plt.imshow(np.array(load_img(test_image)))