#### Import required libraries

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

#### Load Data

In [None]:
df = pd.read_csv('/kaggle/input/age-gender-and-ethnicity-face-data-csv/age_gender.csv')
df.head()

#### Prepare data for training 

In [None]:
ethnicity_mapping = {0:"White", 1:"Black", 2:"Asian", 3:"Indian", 4:"Hispanic"}
gender_mapping = {0:"Male", 1:"Female"}

In [None]:
def get_image(row):
    '''
    functions takes a series and returns the image and it's labels
    '''
    age = row['age']
    ethnicity = row['ethnicity']
    gender = row['gender']
    image = np.array(row['pixels'].split(), 'float64')
    image = image.reshape(48, 48) / 255.0

    return image, age, ethnicity, gender

In [None]:
#Show Images
def show_images():
  plt.figure(figsize=(30, 30))

  for i in range(25):
    #select random image
    index = np.random.randint(0, len(df))
    image, age, ethnicity, gender = get_image(df.iloc[index])

    plt.subplot(5, 5, i+1)
    plt.imshow(image)
    plt.title(' Age: {}\n Ethnicity: {}\n gender: {}'.format(age, ethnicity_mapping[ethnicity], gender_mapping[gender]))
    # plt.axes('off')

  plt.show()

show_images()

In [None]:
'''
Defining data generator
'''
from tensorflow.keras.utils import Sequence
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, name, images, age, ethnicity, gender, batch_size=32, shuffle=True):
        
        
        'Initialization'
        self.name = name
        self.batch_size = batch_size
        self.images = images
        self.age = age
        self.ethnicity = ethnicity
        self.gender = gender
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        #print('returning length')
        #print(int(np.floor(len(self.sentences) / self.batch_size)))
        return int(np.floor(len(self.age) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        #print(index)
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        images = [self.images[k] for k in indexes]
        age = [self.age[k] for k in indexes]
        ethnicity = [self.ethnicity[k] for k in indexes]
        gender = [self.gender[k] for k in indexes]
        
        images = np.array(images)
        age = np.array(age)
        ethnicity = np.array(ethnicity)
        gender = np.array(gender)
        
        X = images
        y = [age, ethnicity, gender]
        
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.age))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)


    def add_data(self, images, age, ethnicity, gender):
        self.images=np.append(self.images, images, axis=0)
        self.age=np.append(self.age, age, axis=0)
        self.ethnicity=np.append(self.ethnicity, ethnicity, axis=0)
        self.gender=np.append(self.gender, gender, axis=0)
        self.on_epoch_end()

In [None]:
'''
function to make generators
'''
def generate_images(df, generator_name, batch_size=32):
    
    total_images = len(df)
    num_batches = total_images // batch_size
    batch_count = 0
    while batch_count < num_batches:
        x_batch = np.zeros((batch_size, 48, 48, 1))
        age_batch = np.zeros((batch_size,))
        ethnicity_batch = np.zeros((batch_size,))
        gender_batch = np.zeros((batch_size,))

        for i in range(batch_size):
            index = np.random.randint(0, total_images)
            image, age, ethnicity, gender = get_image(df.iloc[index])
            image = image.reshape(48, 48, 1)

            x_batch[i] = image / 255.0
            age_batch[i] = age
            ethnicity_batch[i] = ethnicity
            gender_batch[i] = gender
            
        if batch_count == 0:
            generator = DataGenerator(generator_name, x_batch, age_batch, ethnicity_batch, gender_batch)
        else:
            generator.add_data(x_batch, age_batch, ethnicity_batch, gender_batch)
            
        batch_count += 1

    return generator

In [None]:
# Create train and validation data generators

df_val = df.sample(1000)
df_train = df.sample(len(df) - 1000)

train_gen = generate_images(df_train, generator_name='training_generator')
validation_gen = generate_images(df_val, generator_name='validation_generator')

#### Define Model

In [None]:
from tensorflow.keras.layers import Convolution2D, MaxPool2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, Activation, Input
from tensorflow.keras.models import Model
import tensorflow as tf

Here I am using multilabel classification model to predict Age, Gender and Ethnicity of person

In [None]:
input = Input(shape=(48, 48, 1), name='input')

conv_1 = Convolution2D(32, 3, activation='relu')(input)
conv_2 = Convolution2D(32, 3, activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(2, 2))(conv_2)

flat_1 = Flatten(name='flat_1')(pool_1)
gender = Dense(1, activation='sigmoid', name='gender')(flat_1)

drop_1 = Dropout(0.2)(pool_1)

conv_3 = Convolution2D(64, 3, activation='relu')(drop_1)
conv_4 = Convolution2D(128, 3, activation='relu')(conv_3)
pool_2 = MaxPooling2D(pool_size=(2, 2))(conv_4)

flat_2 = Flatten(name='flat_2')(pool_2)
age = Dense(1, name='age')(flat_2)

drop_2 = Dropout(0.2)(pool_2)

conv_5 = Convolution2D(64, 3, activation='relu')(drop_2)
conv_6 = Convolution2D(128, 3, activation='relu')(conv_5)

flat_3 = Flatten(name='flat_3')(conv_6)
ethnicity = Dense(5, activation='softmax', name='ethnicity')(flat_3)

model = Model(inputs=input, outputs=[age, ethnicity, gender])
model.compile(
    loss={
        'age': 'mean_squared_error',
        'ethnicity': 'sparse_categorical_crossentropy',
        'gender': 'binary_crossentropy'
    },
    metrics={
        'age':'mean_squared_error',
        'ethnicity': 'sparse_categorical_accuracy',
        'gender': 'accuracy'
    }, 
    optimizer='adam'
)

model.summary()

#### Training 

In [None]:
class Logger(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print() #just for spacing between two epochs stats

In [None]:
model.fit(train_gen, 
              validation_data=validation_gen, 
              epochs=100,
              callbacks=[Logger(), tf.keras.callbacks.TensorBoard(log_dir='./logs')],
              verbose=1)

Accuracy on validation data

Age (mse): 55.59

Ethnicity (accuracy): 70.26 %

Gender (accuracy): 88.91 % 