# Face classifier

## Set up

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D 

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_curve, roc_auc_score

In [None]:
seed = 42

## Load data

In [None]:
data = pd.read_csv("../data/age_gender.csv")

## Challenger model: CNN classifier

In [None]:
full_img_array_list = np.array([pxlvec2pxlarray(img_vec) for img_vec in full_img_vec_list])
gender = data["gender"].values
ethnicity = data["ethnicity"].values
age = data["age"].values

### Data split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(full_img_array_list, 
                                                    gender,
                                                    test_size = 0.2,
                                                    stratify = gender
                                                    )

In [None]:
X_train.shape, y_train.shape

In [None]:
X_train = X_train.reshape(X_train.shape + (1,))
X_test = X_test.reshape(X_test.shape + (1,))

In [None]:
X_train.shape, y_train.shape

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                  test_size = 0.1,
                                                  stratify = y_train
                                                 )

### Data preprocessing

In [None]:
X_train = X_train.astype("float32")
X_val = X_val.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_val /= 255
X_test /= 255

In [None]:
num_classes = data[target].nunique()
y_train = keras.utils.to_categorical(y_train, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Building the CNN model (AlexNet architecture)

In [None]:
# Shape: (48, 48, 1)
cnn_model = Sequential()
cnn_model.add(Conv2D(16, 
                     (3,3), 
                     padding="same", 
                     strides=(1,1),
                     input_shape = X_train.shape[1:])
             )
# Shape: (48, 48, 16)
cnn_model.add(Activation("relu"))
# Shape: (48, 48, 16)
cnn_model.add(MaxPooling2D(pool_size=(2,2)))
# Shape: (24, 24, 16)
              
cnn_model.add(Conv2D(32, 
                     (3,3), 
                     strides=(1,1))
             )
# Shape: (24, 24, 32)
cnn_model.add(Activation("relu"))
# Shape: (24, 24, 32)
cnn_model.add(MaxPooling2D(pool_size=(2,2)))
# Shape: (12, 12, 32)
              
cnn_model.add(Conv2D(64, 
                     (3,3),
                     strides=(1,1))
             )
# Shape: (12, 12, 64)
cnn_model.add(Activation("relu"))
# Shape: (12, 12, 64)
cnn_model.add(MaxPooling2D(pool_size=(2,2)))
# Shape: (6, 6, 64)

cnn_model.add(Flatten())
cnn_model.add(Dense(128))
cnn_model.add(Activation("relu"))
cnn_model.add(Dropout(0.8))
cnn_model.add(Dense(num_classes))
if num_classes == 2:
    cnn_model.add(Activation("sigmoid"))
elif num_classes >= 2:
    cnn_model.add(Activation("softmax"))
else:
    raise ValueError("number of output classes must be at least 2.")


In [None]:
y_train[0]

In [None]:
adam = keras.optimizers.Adam()

cnn_model.compile(optimizer=adam,
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

In [None]:
history = cnn_model.fit(X_train, y_train,
                        batch_size=16,
                        epochs=19,
                        validation_data=(X_val, y_val),
                        shuffle=True
                        )

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

In [None]:
y_pred = cnn_model.predict(X_test)

In [None]:
y_pred = np.array([np.argmax(i) for i in y_pred])
y_test = np.array([np.argmax(i) for i in y_test])

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
input_img = Input(shape=(48,48,1))
conv1 = Conv2D(16, (3,3), padding="same", strides=(1,1))(input_img)
activ1 = Activation("relu")(conv1)
conv2 = Conv2D(32, (5,5), strides=(1,1))(activ1)
activ2 = Activation("relu")(conv2)
pool1 = MaxPooling2D(pool_size=(2,2))(activ2)
conv3 = Conv2D(64, (5,5), strides=(1,1))(pool1)
activ3 = Activation("relu")(conv3)
pool2 = MaxPooling2D(pool_size=(2,2))(activ3)
flat = Flatten()(pool2)
dense1 = Dense(128, activation="relu")(flat)

if num_classes == 2:
    out = Dense(num_classes, activation="sigmoid")(dense1)
elif num_classes >= 2:
    out = Dense(num_classes, activation="softmax")(dense1)
else:
    raise ValueError("Number of output classes must be at least 2.")

alex_net = Model(input_img, out)

In [None]:
adam = keras.optimizers.Adam()

alex_net.compile(optimizer=adam,
                 loss="binary_crossentropy",
                 metrics=["accuracy"])

In [None]:
an_history = alex_net.fit(X_train, y_train,
                          epochs=4,
                          batch_size=32,
                          validation_data=(X_val, y_val),
                          shuffle=True
                         )             

In [None]:
plt.plot(an_history.history["loss"])
plt.plot(an_history.history["val_loss"])

In [None]:
plt.plot(an_history.history["accuracy"])
plt.plot(an_history.history["val_accuracy"])

In [None]:
y_pred = alex_net.predict(X_test)

In [None]:
y_pred = np.array([np.argmax(i) for i in y_pred])

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
pd.Series(y_test).value_counts()

In [None]:
pd.Series(y_pred).value_counts()