In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv2D,InputLayer, Dropout, BatchNormalization, Flatten, Dense, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.models import Sequential

In [None]:
Dataset = pd.read_csv('../input/age-gender-and-ethnicity-face-data-csv/age_gender.csv')
Dataset.head(5)

In [None]:
Dataset.shape

In [None]:
Dataset.describe()

In [None]:
Dataset.info()

In [None]:
# Actually images are in string format, let's transform it in more useful type of data.

Dataset['pixels'] = Dataset['pixels'].map(lambda x: np.array(x.split(' '), dtype=np.float32).reshape(48, 48))

In [None]:
Dataset['pixels'].shape

In [None]:
Dataset['age'].hist()

In [None]:
Dataset["age_cat"] = pd.cut(Dataset["age"],
                               bins=[0., 20., 40.0, 60., 80., np.inf],
                               labels=[1, 2, 3, 4, 5])

In [None]:
Dataset["age_cat"].value_counts()

In [None]:
Dataset["age_cat"].hist()

In [None]:
Dataset['age_cat'].value_counts()/len(Dataset)

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(Dataset, Dataset["age_cat"]):
    strat_train_set = Dataset.loc[train_index]
    strat_test_set = Dataset.loc[test_index]

In [None]:
strat_test_set['age_cat'].value_counts()/len(strat_test_set)

In [None]:
def age_cat_proportions(data):
    return data["age_cat"].value_counts() / len(data)

train_set, test_set = train_test_split(Dataset, test_size=0.2, random_state=42)

compare_props = pd.DataFrame({
    "Overall": age_cat_proportions(Dataset),
    "Stratified": age_cat_proportions(strat_test_set),
    "Random": age_cat_proportions(test_set),
}).sort_index()
compare_props["Rand. %error"] = 100 * compare_props["Random"] / compare_props["Overall"] - 100
compare_props["Strat. %error"] = 100 * compare_props["Stratified"] / compare_props["Overall"] - 100

In [None]:
compare_props

In [None]:
for set_ in (strat_train_set, strat_test_set):
    set_.drop("age_cat", axis=1, inplace=True)

In [None]:
strat_train_set

In [None]:
full_dataset = strat_train_set.append(strat_test_set)

In [None]:
full_dataset.head()

In [None]:
strat_test_set

In [None]:
full_dataset['pixels'] = full_dataset['pixels'].apply(lambda x: x/255)

## calculating distributions
age_dist = full_dataset['age'].value_counts()
#print(age_dist)
ethnicity_dist = full_dataset['ethnicity'].value_counts()
#print(ethnicity_dist)
gender_dist = full_dataset['gender'].value_counts().rename(index={0:'Male',1:'Female'})
#print(gender_dist)

In [None]:
X = np.array(full_dataset['pixels'].tolist())

## Converting pixels from 1D to 3D
X = X.reshape(X.shape[0],48,48,1)

In [None]:

# split the data into train ad test
np.random.seed(42)
y_age = np.array(full_dataset['age'])
y_gender = np.array(full_dataset['gender'])
print('X',X.shape)
print('y_age',y_age.shape)
print('y_gender',y_gender.shape)

X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(X,y_age, y_gender, test_size=0.2, random_state=42)


In [None]:
y_age_train

In [None]:
full_dataset['pixels'][0].shape
full_dataset['gender'][0]

In [None]:
def plot(X,y):
        plt.title(y)
        plt.imshow(X.reshape(48,48))
        plt.show()

In [None]:
plot(full_dataset['pixels'][10],full_dataset['gender'][10])

In [None]:
import tensorflow.keras.layers as L

tf.keras.backend.clear_session()

AgeModel = tf.keras.Sequential([
    L.InputLayer(input_shape=(48,48,1)),
    L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1)
])


sgd = tf.keras.optimizers.SGD(momentum=0.9)

AgeModel.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])



In [None]:
## Stop training when validation loss reach 0.2700
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_loss')<0.2700):
            print("\nReached 0.2700 val_loss so cancelling training!")
            self.model.stop_training = True
        
callback = myCallback()

In [None]:
AgeModel.summary()

In [None]:
history = AgeModel.fit(X_train, y_age_train, epochs=7, validation_split=0.2, batch_size=64,callbacks=[callback])

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
#plt.gca().set_ylim(0.03, 0.05) # setting limits for y-axis
plt.show()

In [None]:
loss, acc = AgeModel.evaluate(X_test,y_age_test,verbose=0)
print('mean_squared_error: {}'.format(loss))
print('mae: {}'.format(acc))

In [None]:
y_age_test[:10]

In [None]:
y_age_pred = AgeModel.predict(X_test[:10])
np.round(y_age_pred)

In [None]:
##Gender Model
tf.keras.backend.clear_session()
GenderModel = tf.keras.Sequential([
    L.InputLayer(input_shape=(48,48,1)),
    L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1, activation='sigmoid')
])

sgd = tf.keras.optimizers.SGD(learning_rate=0.007)

GenderModel.compile(optimizer='sgd',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])


In [None]:
GenderModel.summary()

In [None]:
Gender_history = GenderModel.fit(
    X_train, y_gender_train, epochs=14, validation_split=0.2, batch_size=64)

In [None]:
pd.DataFrame(Gender_history.history).plot(figsize=(8, 5))
plt.grid(True)
#plt.gca().set_ylim(0.03, 0.05) # setting limits for y-axis
plt.show()

**Measuring Accuracy Using Cross-Validation**

In [None]:
loss, acc = GenderModel.evaluate(X_test,y_gender_test,verbose=0)
print('Test loss: {}'.format(loss))
print('Test Accuracy: {}'.format(acc))

In [None]:
y_gender_test[:10]

In [None]:
y_gender_pred = GenderModel.predict(X_test)
np.round(y_gender_pred)

In [None]:
def mean_absolute_percentage_error(y_age_test,y_age_pred): 
    return np.mean(np.abs((y_age_test-y_age_pred) / y_age_test)) * 100
mean_absolute_percentage_error(y_age_test,y_age_pred)

In [None]:
def plot(X,y_age,y_gender):
    if y_gender<=0.5:
        plt.title('Male and Age is ' +str(y_age))
        
    else:
        plt.title('Female and Age is ' +str(y_age))
    plt.imshow(X.reshape(48,48))
    plt.show()

In [None]:
n=2
plot(X_test[n],y_age_pred[n],y_gender_pred[n])

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns

Actual = np.round(y_gender_test[:])
Predicted = np.round(y_gender_pred[:])
sns.heatmap(confusion_matrix(Actual,Predicted),annot=True,cbar=False,fmt="d")
plt.xlabel("Prediction")
plt.ylabel("Actual");

In [None]:
print(classification_report(Actual,Predicted))