In [3]:
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm

warnings.filterwarnings('ignore') #Clean result
%matplotlib inline

import random
import shutil
import zipfile

%load_ext tensorboard

import tensorflow as tf
from keras.layers import Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D, InputLayer
from keras.models import Model, Sequential
from keras.preprocessing.image import load_img
from PIL import Image

# plot the model
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split

In [5]:
BASE_DIR = '/input/utkface-new/UTKFace/'  # basic directory to load the image or the data

In [6]:
# map labels for gender and race
gender_dict = {0:'Male', 1:'Female'}
race_dict={0:'White', 1:'Black',2:'Asian',3:'Indian',4:'Others (like Hispanic, Latino, Middle Eastern)'}

# labels - age, gender, ethnicity
image_paths, age_labels, gender_labels, race_labels = [], [], [], []

# cleanup data by removing the images that dont have all the labels after split
for filename in tqdm(os.listdir(BASE_DIR)):
  try:
      temp = filename.split('_')
      race= int(temp[2])
      image_path = os.path.join(BASE_DIR, filename)
      age = int(temp[0])
      gender = int(temp[1])
      image_paths.append(image_path)
      age_labels.append(age)
      gender_labels.append(gender)
      race_labels.append(race)
  except Exception as e:
    print(f"ERROR: {filename}: {e}")

In [7]:
# convert to dataframe
df = pd.DataFrame()
df['image'], df['age'], df['gender'],df['race']= image_paths, age_labels, gender_labels, race_labels
df.head()

In [29]:
# to display grid of images
plt.figure(figsize=(20, 20))
files = df.iloc[0:25]

for index, file, age, gender, race in files.itertuples():
    plt.subplot(5, 5, index+1)
    img = load_img(file)
    plt.imshow(np.array(img), cmap='gray')
    plt.title(f"Age: {age}\nGender: {gender_dict[gender]}\nRace:{race_dict[race]}")
    plt.axis('off')

## Feature Extraction

In [9]:
# Functions
def extract_features(images):
    features = []
    for image in tqdm(images):
        img = load_img(image, grayscale=True)      
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
        
    features = np.array(features)   # convert the features to numpy array, CNN can handle np.array
    # ignore this step if using RGB
    features = features.reshape(len(features), 128, 128, 1) 
    return features

In [10]:
X = extract_features(df['image'])
X.shape

# normalize the images
X = X/255.0 # the pixel value is 0 to 255, normalizing will be in the range 0 to 1

y_gender = np.array(df['gender'])
y_age = np.array(df['age'])
y_race=np.array(df['race'])

input_shape = (128, 128, 1)

## Model building

In [64]:
inputs = Input((input_shape))       # image as an input. And getting 2 output: classification and regresssion
# convolutional layers
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu') (inputs)  # this activation layer improves the model performance
maxp_1 = MaxPooling2D(pool_size=(2, 2)) (conv_1)                     # conv_1 is output, just passing here at as input
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu') (maxp_1)
maxp_2 = MaxPooling2D(pool_size=(2, 2)) (conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu') (maxp_2)
maxp_3 = MaxPooling2D(pool_size=(2, 2)) (conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu') (maxp_3)
maxp_4 = MaxPooling2D(pool_size=(2, 2)) (conv_4)

flatten = Flatten() (maxp_4)  # flatten is conventional layers heve weight in terms of matrix structure, so flattern convert to single dimension

# fully connected layers
dense_1 = Dense(256, activation='relu') (flatten)
dense_2 = Dense(256, activation='relu') (flatten)

dropout_1 = Dropout(0.5) (dense_1)
dropout_2 = Dropout(0.3) (dense_2)

output_1 = Dense(5, activation='softmax', name='race_out') (dropout_1)
output_2 = Dense(1, activation='sigmoid', name='gender_out') (dropout_2)

model = Model(inputs=[inputs], outputs=[output_1])

model.compile(loss=[tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)], optimizer='rmsprop', metrics=['accuracy'])

In [12]:
model.summary()

In [56]:
plot_model(model)

In [62]:
checkpoint_path = '/working/cp_03.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [65]:
# train model
history = model.fit(x=X, y=[y_race], batch_size=32, epochs=16, validation_split=0.2, callbacks=[cp_callback]) # Pass callback to training

In [66]:
saved_model_path = '/working/saved_model_21Jul2022_race_02.h5'
model.save(saved_model_path)

# saved_model_path = '/working/saved_model_21Jul2022_race_01.h5'
# model = tf.keras.models.load_model(saved_model_path)
# new_model.summary()

In [76]:
# plot results for gender
acc = history.history['race_out_accuracy']
val_acc = history.history['val_race_out_accuracy']
epochs = range(len(acc))    # can getting the number of epochs

plt.plot(epochs, acc, 'b', label='Training Accuracy')        # 'b' is blue
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')  # 'r' is red
plt.title('Accuracy Graph')
plt.legend()
plt.figure()

loss = history.history['race_out_loss']
val_loss = history.history['val_race_out_loss']

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.show()

### Prediction with Test Data

In [75]:
def predict_image(image_index, X, model):
    print("Original Race:", race_dict[y_race[image_index]],)
    pred = model.predict(X[image_index].reshape(1, 128, 128, 1))
    pred_race = race_dict[list(pred[0][0]).index(np.max(pred[0][0]))]
    print("Predicted Race:", pred_race,)
    plt.axis('off')
    plt.imshow(X[image_index].reshape(128, 128), cmap='gray')
    plt.show()

vals = [index for index, file, age, gender, race in files.itertuples()]
for i, index in enumerate(np.random.choice(vals, size=15, replace=False)):
    predict_image(index, X, model)