# **Age, Gender and Race Prediction**

### **Details**


**ETHNICITIES = [ 0: "White", 1: "Black", 2: "Asian", 3: "Indian", 4: "Hispanic" ]**

**GENDERS = [ 0: "Male", 1: "Female" ]**


In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import os
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator as imgen
from keras.models import load_model, Sequential
from keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,Dense,BatchNormalization
from keras.preprocessing import image
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,mean_absolute_error

## Reading and fixing Dataset

In [None]:
#read
data = pd.read_csv("../input/age-gender-and-ethnicity-face-data-csv/age_gender.csv")
data.head()

In [None]:
#shape
data.shape

In [None]:
data.info()

In [None]:
# Getting Image data

def toPixels(pixels):
    arr = np.array(pixels.split(),"float64")
    arr = arr.reshape(48,48)
    
    return arr

In [None]:
%%time
data["pixels"] = data["pixels"].apply(toPixels)

In [None]:
pixels = np.reshape(data["pixels"].to_list(), (data.shape[0],48,48,1))

In [None]:
pixels.shape

# **Age Prediction Model**

### Generating Datsets for Age Prediction

In [None]:
x_train_age,x_test_age, y_train_age,y_test_age = train_test_split(pixels,np.array(data["age"]),random_state = 42, test_size = 0.2)
x_train_age,x_val_age, y_train_age,y_val_age = train_test_split(x_train_age,y_train_age,random_state = 21, test_size = 0.15)

In [None]:
print(x_train_age.shape,y_train_age.shape, x_test_age.shape,x_val_age.shape)

**Image data Generator.**

In [None]:
traingen = imgen(rescale=1./255,
                zoom_range=0.2,
                 shear_range=0.2,
                 horizontal_flip= True
                )
valgen = imgen(rescale=1./255,
                zoom_range=0.2,
                 shear_range=0.2,
                 horizontal_flip= True
                )

testgen = imgen(rescale=1./255)

In [None]:
age_train_ds = traingen.flow(x_train_age,y_train_age,
                        batch_size = 32
                       )

age_val_ds = valgen.flow(x_val_age,y_val_age,
                        batch_size = 32
                       )

age_test_ds = testgen.flow(x_test_age,y_test_age,
                     batch_size = 32,
                    shuffle=False
                     )

**Visualizing one batch.**

In [None]:
def showImagesAge(img,label):
    plt.figure(figsize=[22,15])
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(img[i])
        plt.title("Age is {}".format(label[i]))
        plt.axis('off')
    plt.show()

In [None]:
X,Y = next(age_train_ds)

showImagesAge(X,Y)

### Model

In [None]:
image_input = keras.Input(shape=(48,48,1))

l1 = Conv2D(32,(3,3), activation="relu")(image_input)
l2 = Conv2D(32,(3,3), activation="relu")(l1)
l3 = MaxPooling2D(pool_size=(2, 2))(l2)

l4 = Dropout(0.30)(l3)

l5 = Conv2D(64,(3,3), activation="relu")(l4)
l6 = Conv2D(128,(3,3), activation="relu")(l5)

#l7 = BatchNormalization()(l6)

l8 = Flatten()(l6)
l9 = Dense(256, activation= "relu")(l8)
image_output = Dense(1)(l9)
model_age = keras.Model(image_input, image_output)

In [None]:
model_age.summary()

In [None]:
# Compiling the model
model_age.compile(optimizer='adam', loss = 'mse', metrics=[keras.metrics.mean_absolute_error])

**Defining callbacks.**

In [None]:
my_calls = [keras.callbacks.EarlyStopping(monitor='val_mean_absolute_error',patience=3),
            keras.callbacks.ModelCheckpoint("Model_age.h5",verbose=1,save_best_only=True)]

**Training the model for age.**

In [None]:
hist_age = model_age.fit(age_train_ds,epochs=35,validation_data=age_val_ds,callbacks=my_calls)

**Test for age prediction**

In [None]:
model_age.evaluate(age_test_ds,verbose=1)

**Loss and MAE**

In [None]:
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(hist_age.epoch,hist_age.history['mean_absolute_error'],label = 'Training')
plt.plot(hist_age.epoch,hist_age.history['val_mean_absolute_error'],label = 'validation')

plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist_age.epoch,hist_age.history['loss'],label = 'Training')
plt.plot(hist_age.epoch,hist_age.history['val_loss'],label = 'validation')

plt.title("Loss")
plt.legend()
plt.show()

**Verifying the Predictions**

In [None]:
pred_Age = model_age.predict(age_test_ds, verbose=1)

**Plotting predicetd v/s actual ages**

In [None]:
pred_age = []
for i in pred_Age:
    pred_age.append(np.round(i[0]))

In [None]:
def plotAgePA(image,pred,actual):
    plt.figure(figsize=[22,15])
    for i in range(500,525):
        plt.subplot(5,5,(i%25)+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(x_test_age[i])
        plt.xlabel("Actual Age is {}".format(actual[i]))
        plt.ylabel("Prediced is {}".format(pred[i]))
    plt.show()

In [None]:
plotAgePA(x_test_age,pred_age,y_test_age)

# **Gender Prediction Model**

### Dataset for Gender Prediction

In [None]:
x_train_gen,x_test_gen, y_train_gen,y_test_gen = train_test_split(pixels,np.array(data["gender"]),random_state = 42, test_size = 0.2)
x_train_gen,x_val_gen, y_train_gen,y_val_gen = train_test_split(x_train_gen,y_train_gen,random_state = 21, test_size = 0.15)

In [None]:
print(x_train_gen.shape,y_train_gen.shape,x_val_gen.shape,y_val_gen.shape)

**Data Generator**

In [None]:
gender = ["Male","Female"]

In [None]:
gen_train_ds = traingen.flow(x_train_gen,y_train_gen,
                        batch_size = 32
                       )

gen_val_ds = valgen.flow(x_val_gen,y_val_gen,
                        batch_size = 32
                       )

gen_test_ds = testgen.flow(x_test_gen,y_test_gen,
                     batch_size = 32,
                    shuffle=False
                     )

In [None]:
#one batch
def showImagesGender(img,label):
    plt.figure(figsize=[22,15])
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(img[i])
        plt.title("Gender is {}".format(gender[label[i]]))
        plt.axis('off')
    plt.show()

In [None]:
A,b = next(gen_train_ds)
showImagesGender(A,b)

## Model

In [None]:
model_gender = Sequential([
    Conv2D(32,(3,3), activation = "relu", input_shape = (48,48,1)),
    MaxPooling2D(2,2),
    
    Conv2D(32,(3,3), activation = "relu"),
    MaxPooling2D(2,2),
    
    Dropout(0.3),
    
    Conv2D(64,(3,3), activation = "relu"),
    MaxPooling2D(2,2),
    
    #Conv2D(128,(3,3), activation = "relu"),
    #MaxPooling2D(2,2),
    
    Flatten(),
    
    #Dense(256,activation = 'relu'),
    #Dropout(0.5),
    
    Dense(64,activation = 'relu'),
    Dropout(0.5),
    
    
    Dense(1,activation='sigmoid')
])
model_gender.summary()

**compile the model**

In [None]:
model_gender.compile(optimizer='adam',loss = "binary_crossentropy",metrics=['accuracy'])

**Callbacks**

In [None]:
my_calls_1 = [keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=3),
            keras.callbacks.ModelCheckpoint("Model_Gender.h5",verbose=1,save_best_only=True)]

**Train**

In [None]:
hist_gender = model_gender.fit(gen_train_ds,epochs = 23, validation_data = gen_val_ds, callbacks = my_calls_1)

**Test**

In [None]:
model_gender.evaluate(gen_test_ds)

**Loss and Accuracy**

In [None]:
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(hist_gender.epoch,hist_gender.history['accuracy'],label = 'Training')
plt.plot(hist_gender.epoch,hist_gender.history['val_accuracy'],label = 'validation')

plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist_gender.epoch,hist_gender.history['loss'],label = 'Training')
plt.plot(hist_gender.epoch,hist_gender.history['val_loss'],label = 'validation')

plt.title("Loss")
plt.legend()
plt.show()

**Predictions**

In [None]:
pred_gender = model_gender.predict(gen_test_ds)

In [None]:
pred_gen = []
for i in pred_gender:
    pred_gen.append(int(np.round(i[0])))

In [None]:
pred_gen[:5]

In [None]:
print(classification_report(pred_gen,y_test_gen))

In [None]:
sns.heatmap(confusion_matrix(pred_gen,y_test_gen),annot = True, fmt = 'd', cmap = "BuPu");

In [None]:
def testGender(image,pred,actual):
    plt.figure(figsize=[22,15])
    for i in range(500,525):
        plt.subplot(5,5,(i%25)+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(image[i])
        plt.xlabel("Actual Gender is {}".format(gender[actual[i]]))
        plt.ylabel("Prediced is {}".format(gender[pred[i]]))
    plt.show()

In [None]:
testGender(x_test_gen,pred_gen, y_test_gen)

# **Ethnicity Prediction**

## Data for ethnicity prediction

In [None]:
x_train_et,x_test_et, y_train_et,y_test_et = train_test_split(pixels,np.array(data["ethnicity"]),random_state = 42, test_size = 0.2)
x_train_et,x_val_et, y_train_et,y_val_et = train_test_split(x_train_et,y_train_et,random_state = 21, test_size = 0.15)

In [None]:
print(x_train_et.shape,y_train_et.shape)

In [None]:
et_train_ds = traingen.flow(x_train_et,y_train_et,
                        batch_size = 32
                       )

et_val_ds = valgen.flow(x_val_et,y_val_et,
                        batch_size = 32
                       )

et_test_ds = testgen.flow(x_test_et,y_test_et,
                     batch_size = 32,
                    shuffle=False
                     )

In [None]:
#one batch
def showImagesEthnicity(img,label):
    plt.figure(figsize=[22,15])
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(img[i])
        plt.title("Race is {}".format(label[i]))
        plt.axis('off')
    plt.show()

In [None]:
c,d = next(et_train_ds)
showImagesEthnicity(c,d)

## Model

In [None]:
model_et = Sequential([
    Conv2D(32,(3,3), activation = "relu", input_shape = (48,48,1)),
    MaxPooling2D(2,2),
    
    Conv2D(32,(3,3), activation = "relu"),
    MaxPooling2D(2,2),
    
    Dropout(0.3),
    
    Conv2D(64,(3,3), activation = "relu"),
    MaxPooling2D(2,2),
    
    Conv2D(128,(3,3), activation = "relu"),
    MaxPooling2D(2,2),
    
    Flatten(),
    
    Dense(256,activation = 'relu'),
    Dropout(0.5),
    
    #Dense(64,activation = 'relu'),
    #Dropout(0.5),
    
    
    Dense(5,activation='softmax')
])
model_et.summary()

**Compile the model**

In [None]:
model_et.compile(optimizer='adam',loss = "sparse_categorical_crossentropy",metrics=['accuracy'])

**Callbacks**

In [None]:
my_calls_2 = [keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=3),
            keras.callbacks.ModelCheckpoint("Model_Ethnicity.h5",verbose=1,save_best_only=True)]

**Train**

In [None]:
hist_et = model_et.fit(et_train_ds,epochs=22,validation_data=et_val_ds,callbacks=my_calls_2)

**Test**

In [None]:
model_et.evaluate(et_test_ds)

**Loss and Accuracy**

In [None]:
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(hist_et.epoch,hist_et.history['accuracy'],label = 'Training')
plt.plot(hist_et.epoch,hist_et.history['val_accuracy'],label = 'validation')

plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist_et.epoch,hist_et.history['loss'],label = 'Training')
plt.plot(hist_et.epoch,hist_et.history['val_loss'],label = 'validation')

plt.title("Loss")
plt.legend()
plt.show()

**Predictions**

In [None]:
pred_et = model_et.predict(et_test_ds)

In [None]:
pred_et = [np.argmax(i) for i in pred_et]
pred_et[:5]

In [None]:
pd.DataFrame(pred_et).value_counts()

In [None]:
pd.DataFrame(y_test_et).value_counts()

In [None]:
print(classification_report(pred_et,y_test_et))

In [None]:
plt.figure(figsize=[12,7])
sns.heatmap(confusion_matrix(pred_et,y_test_et),annot=True,fmt='d',cmap="Blues");

**Not performing well at all, will get back to this**