# Task for Today  
***
## Gender, Ethnicity, and Age Classification  

Given the face image data, let's see if we can correctly classify the **gender**, **ethnicity**, and **age** of a person.  
  
We will use three different TensorFlow CNNs to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/age-gender-and-ethnicity-face-data-csv/age_gender.csv')

In [None]:
data

# Preprocessing

In [None]:
data.isnull().sum()

In [None]:
data = data.drop('img_name', axis=1)

In [None]:
{column: list(data[column].unique()) for column in ['gender', 'ethnicity', 'age']}

In [None]:
data['age'] = pd.qcut(data['age'], q=4, labels=[0, 1, 2, 3])

In [None]:
data

In [None]:
print(len(data['pixels'][0].split(' ')))
print(np.sqrt(2304))

In [None]:
num_pixels = 2304
img_height = 48
img_width = 48

In [None]:
target_columns = ['gender', 'ethnicity', 'age']

y = data[target_columns]
X = data.drop(target_columns, axis=1)

In [None]:
y

In [None]:
X

In [None]:
X = pd.Series(X['pixels'])
X = X.apply(lambda x: x.split(' ')) # Get array of pixels
X = X.apply(lambda x: np.array(list(map(lambda z: np.int(z), x)))) # Turn all pixels into type int
X = np.array(X) # Make array a numpy array 
X = np.stack(np.array(X), axis=0) # Rearange the numpy arrays from many small arrays to fewer I think. 
                                  # Ex.: [array([1,4,2 ..., 7]), array([...]),] => [[[1,4,2 ..., 7], [...]]]
X = np.reshape(X, (-1, 48, 48)) # Reshape the array to same amounts of columns (images), but each image should be 48x48 because of image size
X.shape # (length, 48, 48)

# Visualization

In [None]:
plt.figure(figsize=(10, 10))

for index, image in enumerate(np.random.randint(2000, 3000, 9)):
    plt.subplot(3, 3, index + 1)
    plt.imshow(X[image])
#     plt.axis('off')
    plt.xlabel(
        "Age:"+str(y['age'].iloc[index])+
        "  Ethnicity:"+str(y['ethnicity'].iloc[index])+
        "  Gender:"+ str(y['gender'].iloc[index])
    )

plt.show()

# Training

In [None]:
y_gender = np.array(y['gender'])
y_ethnicity = np.array(y['ethnicity'])
y_age = np.array(y['age'])

In [None]:
X.shape

In [None]:
def build_model(num_classes, activation='softmax', loss='sparse_categorical_crossentropy'):
    
    inputs = tf.keras.Input(shape=(img_height, img_width, 1))
    x = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
    x = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D()(x)
    x = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D()(x)
    x = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D()(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation=activation)(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    
    model.compile(
        optimizer='adam',
        loss=loss,
        metrics=['accuracy']
    )
    
    return model

In [None]:
{column: list(data[column].unique()) for column in ['gender', 'ethnicity', 'age']}

In [None]:
X_gender_train, X_gender_test, y_gender_train, y_gender_test = train_test_split(X, y_gender, train_size=0.7)
X_ethnicity_train, X_ethnicity_test, y_ethnicity_train, y_ethnicity_test = train_test_split(X, y_ethnicity, train_size=0.7)
X_age_train, X_age_test, y_age_train, y_age_test = train_test_split(X, y_age, train_size=0.7)

## Gender Model

In [None]:
gender_model = build_model(1, activation='sigmoid', loss='binary_crossentropy')

gender_history = gender_model.fit(
    X_gender_train,
    y_gender_train,
    validation_split=0.2,
    batch_size=64,
    epochs=7,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=1
)

In [None]:
fig = px.line(
    gender_history.history,
    y=['loss', 'val_loss'],
    labels={'index': "Epoch", 'value': "Loss"},
    title="Gender Model"
)

fig.show()

In [None]:
gender_acc = gender_model.evaluate(X_gender_test, y_gender_test)[1]

In [None]:
gender_model.evaluate(X_gender_test, y_gender_test)

### Test model with own images (Predict)

In [None]:
plt.figure()
def show_image(image):
    plt.imshow(image)
    plt.colorbar()
    plt.grid(False)
    plt.show()

In [None]:
# Sample image
prediction_image = Image.open('../input/images/pia.jpg').convert('L') # Male, age 78
show_image(prediction_image)

In [None]:
# Input shape for one image: (1, 48, 48) => [[[row1],[row,2] ..., ]]
prediction_image = np.asarray(prediction_image) 
# prediction_image = prediction_image[48:96,48:96]
print(prediction_image.shape)

In [None]:
# Picture of male with corresponding age group
plt.figure()
plt.imshow(X[9929])
plt.colorbar()
plt.grid(False)
plt.show()
print("Age group: ", y['age'][9929])
print("Gender: ", y['gender'][9929]) # So male is 0 and female is 1 in gender

In [None]:
# Using Keras ImageDataGenerator function. And divide all pixels in image by 255
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)
datagen

In [None]:
# Create a "validation generator", which we will later pass to the gender_model
new_generator = datagen.flow_from_directory(
    '/kaggle/input/', # Path to images (for some reason adding 'images/' won't work)
    target_size=(48, 48), # Make image 48x48 pixels
    class_mode='binary', # Binary because the model outputs 1D binary labels,
    batch_size=4,
    color_mode='grayscale'
)

# Let's transfer the images to class folders in the working directory

In [None]:
import os
import shutil

In [None]:
# By putting the images in folders with class names, flow_from_directory will automatically infer the class names from the parent folders
os.mkdir('./MALE')
os.mkdir('./FEMALE')

In [None]:
# Just copying the files into the proper folders
shutil.copyfile('../input/images/Samuel-L-Jackson.jpg', './MALE/Samuel-L-Jackson.jpg')
shutil.copyfile('../input/images/baby.jpg', './MALE/baby.jpg')
shutil.copyfile('../input/images/paul_mccartney.jpg', './MALE/paul_mccartney.jpg')
shutil.copyfile('../input/images/pia.jpg', './FEMALE/pia.jpg')

In [None]:
# Using Keras ImageDataGenerator function. And divide all pixels in image by 255
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)
datagen

In [None]:
# Create a "validation generator", which we will later pass to the gender_model
image_generator = datagen.flow_from_directory(
    './', # Using the working directory, which will create classes from all subfolders (i.e. MALE and FEMALE), and assign the respective classes to each image
    target_size=(48, 48), # Make image 48x48 pixels
    class_mode='binary', # Binary because the model outputs 1D binary labels,
    color_mode='grayscale'
)

In [None]:
new_X, new_y = image_generator.next() # .next() allows us to grab the image batch (of size 4 in this case) and store the image data in new_X and the labels in new_y

In [None]:
new_X.shape

### This seems to be the reason that we are having a hard time classifying the images.  
Since the images are only 48x48 pixels, we lose a lot of quality and detail when we stretch the images to fit.  
Compared to the image data in the original dataset, these ones are nearly unusable in their current state.  
I recommend you crop the images to include only the face before feeding them into the model.

In [None]:
plt.figure(figsize=(10, 10))
for i in range(len(new_X)):
    plt.subplot(2, 2, i + 1)
    plt.imshow(np.squeeze(new_X[i]))
plt.show()

In [None]:
image_generator.class_indices # We can get the class labels that have been assigned to each name via .class_indices

In [None]:
gender_model.predict(new_X) # The model predicts female for every image

In [None]:
gender_model.evaluate(new_X, new_y) # Which yields a 25% accuracy

## Ethnicity Model

In [None]:
# ethnicity_model = build_model(5, activation='softmax', loss='sparse_categorical_crossentropy')

# ethnicity_history = ethnicity_model.fit(
#     X_ethnicity_train,
#     y_ethnicity_train,
#     validation_split=0.2,
#     batch_size=64,
#     epochs=8,
#     callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
#     verbose=0
# )

In [None]:
# fig = px.line(
#     ethnicity_history.history,
#     y=['loss', 'val_loss'],
#     labels={'index': "Epoch", 'value': "Loss"},
#     title="Ethnicity Model"
# )

# fig.show()

In [None]:
# ethnicity_acc = ethnicity_model.evaluate(X_ethnicity_test, y_ethnicity_test)[1]

## Age Model

In [None]:
# age_model = build_model(4, activation='softmax', loss='sparse_categorical_crossentropy')

# age_history = age_model.fit(
#     X_age_train,
#     y_age_train,
#     validation_split=0.2,
#     batch_size=64,
#     epochs=7,
#     callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
#     verbose=0
# )

In [None]:
# fig = px.line(
#     age_history.history,
#     y=['loss', 'val_loss'],
#     labels={'index': "Epoch", 'value': "Loss"},
#     title="Age Model"
# )

# fig.show()

In [None]:
# age_acc = age_model.evaluate(X_age_test, y_age_test)[1]

# Results

In [None]:
# fig = px.bar(
#     x=["Gender", "Ethnicity", "Age"],
#     y=[gender_acc, ethnicity_acc, age_acc],
#     labels={'x': "", 'y': "Accuracy"},
#     color=["Gender", "Ethnicity", "Age"],
#     title="Model Performance"
# )

# fig.show()