# **Importing libraries**

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow.keras.layers as L
from tensorflow.keras.layers import Conv2D,InputLayer, Dropout, BatchNormalization, Flatten, Dense, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split

# **Loading Dataset**

In [None]:
#As we are focusing now on predicting age from image we will call the dataset as df_age:

data = pd.read_csv('../input/age-gender-and-ethnicity-face-data-csv/age_gender.csv')
data.head(5)

In [None]:
data.shape

In [None]:
data.describe()

# **Data Preprocessing**

In [None]:
data.isnull().sum()

In [None]:
data = data.drop('img_name', axis=1)

In [None]:
data.head(10)

In [None]:
data['pixels'].shape

In [None]:
data['age'].value_counts()

# **Discover and visualize the data to gain insights**

> **Distribution of Age**

In [None]:
plt.figure(figsize=(10,4))
data['age'].hist(bins=data['age'].nunique())
plt.xlabel("$AGE$")
plt.ylabel("$Counts$")
plt.title("Distribution of Age")
plt.show()

In [None]:
print("Avg Age: " + str(data["age"].mean()))
print("Max Age: " + str(data["age"].max()))
print("Min Age: " + str(data["age"].min()))
print("Median Age: " + str(data["age"].median()))

In [None]:
#Using seaborn
plt.figure(figsize=(25,8))
sns.barplot(x= list(data['age'].value_counts().index),y=list(data['age'].value_counts().values))
plt.plot(figsize=(10,20))
plt.xlabel("$AGE$")
plt.ylabel("$Counts$")
plt.title("Distribution of Age")
plt.show()

In [None]:
# To make this notebook's output identical at every run
np.random.seed(42)

data['pixels'] = data['pixels'].map(lambda x: np.array(x.split(' '), dtype=np.float32).reshape(48, 48))

In [None]:
data['pixels'].head()

In [None]:
## Normalizing pixels data
data['pixels'] = data['pixels'].apply(lambda x: x/255)

In [None]:
data['pixels'].head()

In [None]:
X = np.array(data['pixels'].tolist())
X.shape

In [None]:
## Converting pixels from 1D to 3D
X = X.reshape(X.shape[0],48,48,1)
X.shape

In [None]:
y_age = np.array(data['age'])
y_gender = np.array(data['gender'])
print('X',X.shape)
print('y_age',y_age.shape)
print('y_gender',y_gender.shape)


In [None]:
# X_age_train, X_age_test, y_age_train, y_age_test = train_test_split(X, y_age, test_size=0.2, random_state=42)
# X_age_train, X_age_test, y_gender_train, y_gender_test = train_test_split(X, y_gender, test_size=0.2, random_state=42)

In [None]:
X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(X,y_age, y_gender, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_age_train.shape)
print(y_gender_train.shape)
print(y_age_test.shape)
print(y_gender_test.shape)

In [None]:
print("Value of y_training_data[0]: ",y_age_train[0])
print("Value of y_test_data[0]: ",y_age_test[0])
print("Value of y_training_data[0]: ",y_gender_train[0])
print("Value of y_test_data[0]: ",y_gender_test[0])

# **Training the models**

In [None]:
tf.keras.backend.clear_session()

AgeModel = tf.keras.Sequential([
    L.InputLayer(input_shape=(48,48,1)),
    L.Conv2D(64, (3, 3), activation='relu', input_shape=(32, 32, 3),padding='same'),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(128, (3, 3), activation='relu',padding='same'),
    L.Dropout(rate=0.3),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(256, (3, 3), activation='relu',padding='same'),
    L.MaxPooling2D((2, 2)),
    L.Dropout(rate=0.5),
    L.BatchNormalization(),
    L.Flatten(),
    L.Dense(512, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(256, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1)
])


AgeModel.compile(optimizer='adam',
        loss='mean_squared_error',
        metrics=['accuracy'])


In [None]:
## Stop training when validation loss reach 0.2700
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_loss')<0.2700):
            print("\nReached 0.2700 val_loss so cancelling training!")
            self.model.stop_training = True
        
callback = myCallback()

In [None]:
AgeModel.summary()

In [None]:
##Gender Model
GenderModel = tf.keras.Sequential([
    L.InputLayer(input_shape=(48,48,1)),
    L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1, activation='sigmoid')
])

sgd = tf.keras.optimizers.SGD(learning_rate=0.007)

GenderModel.compile(optimizer='sgd',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])


# ## Stop training when validation loss reach 0.2700
# class myCallback(tf.keras.callbacks.Callback):
#     def on_epoch_end(self, epoch, logs={}):
#         if(logs.get('val_loss')<0.2700):
#             print("\nReached 0.2700 val_loss so cancelling training!")
#             self.model.stop_training = True
        
# callback = myCallback()

GenderModel.summary()

In [None]:
Age_history = AgeModel.fit(X_train, y_age_train, epochs=5, validation_split=0.2, batch_size=64,callbacks=[tf.keras.callbacks.ReduceLROnPlateau()])

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(AgeModel, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
pd.DataFrame(Age_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0.03, 0.05) # setting limits for y-axis
plt.show()

In [None]:
import plotly.express as px
fig = px.line(
    Age_history.history, y=['loss', 'val_loss'],
    labels={'index': 'epoch', 'value': 'loss'}, 
    title='Training History')
fig.show()

In [None]:
Gender_history = GenderModel.fit(
    X_train, y_gender_train, epochs=14, validation_split=0.1, batch_size=64, callbacks=[callback]
)

In [None]:

fig = px.line(
    Gender_history.history, y=['loss', 'val_loss'],
    labels={'index': 'epoch', 'value': 'loss'}, 
    title='Training History')
fig.show()

# **Model evaluation on test set**

In [None]:
##Age
mean_squared_error, mae = AgeModel.evaluate(X_test,y_age_test,verbose=0)
print('mean_squared_error: {}'.format(mean_squared_error))
print('mae: {}'.format(mae))

In [None]:
##Gender y_gender_train, 
loss, acc = GenderModel.evaluate(X_test,y_gender_test,verbose=0)
print('Test loss: {}'.format(loss))
print('Test Accuracy: {}'.format(acc))

In [None]:
# Make predictions 
y_age_pred = AgeModel.predict(X_test)
y_gender_pred = GenderModel.predict(X_test)

In [None]:
print(y_age_pred)
print(y_gender_pred)

In [None]:
def mean_absolute_percentage_error(y_age_test,y_age_pred): 
    return np.mean(np.abs((y_age_test-y_age_pred) / y_age_test)) * 100
mean_absolute_percentage_error(y_age_test,y_age_pred)

In [None]:
def plot(X,y_age,y_gender):
    if y_gender<=0.5:
        plt.title('Male and Age is ' +str(y_age))
        
    else:
        plt.title('Female and Age is ' +str(y_age))
    plt.imshow(X.reshape(48,48))
    plt.show()

In [None]:
n=3
plot(X_test[n],y_age_pred[n],y_gender_pred[n])

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

Actual = np.round(y_gender_test)
Predicted = np.round(y_gender_pred)
sns.heatmap(confusion_matrix(Actual,Predicted),annot=True,cbar=False,fmt="d")
plt.xlabel("Prediction")
plt.ylabel("Actual");

In [None]:
print(classification_report(Actual,Predicted))