In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tqdm.notebook as tqdm
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import confusion_matrix as cm, classification_report as cr

In [None]:
df_train = pd.read_csv('../input/fairface/FairFace/train_labels.csv')
df_train['split'] = 'train'
df_train['file'] = '../input/fairface/FairFace/' + df_train.file
df_test = pd.read_csv('../input/fairface/FairFace/val_labels.csv')
df_test['file'] = '../input/fairface/FairFace/' + df_test.file
df_test['split'] = 'test'
df = pd.concat([df_train, df_test])

df.race, labels_map = pd.Categorical(df.race).factorize()

df = df.drop(columns=['service_test', 'gender', 'age'])

In [None]:
df.describe()

In [None]:
df['race'].value_counts().plot.bar()
plt.show()
plt.close()

In [None]:
df_train = df[df.split == 'train'].drop(columns=['split'])
df_train, df_val = train_test_split(df_train, test_size=0.1, random_state=0, shuffle=True)
df_test = df[df.split == 'test'].drop(columns=['split'])

# Model

In [None]:
IMG_SIZE = 224
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 224
NUM_CLASSES = len(labels_map)

# Dataset creation
y_train = tf.keras.utils.to_categorical(df_train.race, num_classes=NUM_CLASSES, dtype='float32')
y_test = tf.keras.utils.to_categorical(df_test.race, num_classes=NUM_CLASSES, dtype='float32')
y_val = tf.keras.utils.to_categorical(df_val.race, num_classes=NUM_CLASSES, dtype='float32')

train_ds = tf.data.Dataset.from_tensor_slices((df_train.file, y_train)).shuffle(len(y_train))
val_ds = tf.data.Dataset.from_tensor_slices((df_val.file, y_val))
test_ds = tf.data.Dataset.from_tensor_slices((df_test.file, y_test))

assert len(train_ds) == len(df_train.file) == len(df_train.race)
assert len(val_ds) == len(df_val.file) == len(df_val.race)
assert len(test_ds) == len(df_test.file) == len(df_test.race)

# Read files
def map_fn(path, label):
    image = tf.io.decode_jpeg(tf.io.read_file(path))
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image, label

# Read files
train_ds = train_ds.map(lambda path, lbl: (tf.io.decode_jpeg(tf.io.read_file(path)), lbl), num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda path, lbl: (tf.io.decode_jpeg(tf.io.read_file(path)), lbl), num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(lambda path, lbl: (tf.io.decode_jpeg(tf.io.read_file(path)), lbl), num_parallel_calls=AUTOTUNE)

# Batch and resize after batch, then prefetch
train_ds = train_ds.map(lambda imgs, lbls: (tf.image.resize(imgs, (IMG_SIZE, IMG_SIZE)), lbls), num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda imgs, lbls: (tf.image.resize(imgs, (IMG_SIZE, IMG_SIZE)), lbls), num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(lambda imgs, lbls: (tf.image.resize(imgs, (IMG_SIZE, IMG_SIZE)), lbls), num_parallel_calls=AUTOTUNE)

train_ds = train_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE)
test_ds = test_ds.batch(BATCH_SIZE)

# Performance enchancement - cache, batch, prefetch
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

# Display some
imgs, lbls = list(train_ds.take(1))[0]
n = 3
fig, ax = plt.subplots(1, n, figsize=(5*n, 5))
for i, (img, lbl) in enumerate(zip(imgs[:n], lbls[:n])):
    ax[i].imshow(img.numpy().astype('uint8'))
    ax[i].set(xlabel=lbl.numpy().argmax())

plt.show()

In [None]:
# Define model and pre-processing layers

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.2)
])

preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

base_model = tf.keras.applications.MobileNetV2(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling="avg"
)

base_model.trainable = False

In [None]:
# Train
LEARNING_RATE = 1e-3

inputs = x = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = data_augmentation(x)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(units=NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)

model.compile(
    optimizer=tf.optimizers.Adam(LEARNING_RATE),
    loss="categorical_crossentropy",
    metrics=["categorical_accuracy"]
)

model.summary()

In [None]:
EPOCHS = 50

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='min', min_delta=0.0001),
    tf.keras.callbacks.ModelCheckpoint(
        'weights.tf', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=0, patience=15, verbose=1, restore_best_weights=True)
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    verbose=1,
    callbacks=callbacks,
    epochs=EPOCHS,
)

model.load_weights('weights.tf')

model.evaluate(test_ds)

In [None]:
y_true, y_pred = [], []
for imgs, lbls in test_ds.take(len(test_ds)):
    y_true.append(lbls.numpy())
    y_pred.append(model.predict(imgs))

y_pred = np.argmax(np.concatenate(y_pred), axis=1) 
y_true = np.argmax(np.concatenate(y_true), axis=1)   

print(cr(y_pred=y_pred, y_true=y_true))
print(cm(y_pred=y_pred, y_true=y_true))

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7.5))

ax1.plot(history.history['categorical_accuracy'])
ax1.plot(history.history['val_categorical_accuracy'])
ax1.set_title('model accuracy')
ax1.set(ylabel='accuracy', xlabel='epoch')
ax1.legend(['train', 'val'], loc='upper left')

ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_title('model loss')
ax2.set(ylabel='loss', xlabel='epoch')
ax2.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Fine tuning
FINE_TUNE_EPOCHS = 50

base_model.trainable = True
print("Number of layers in the base model: ", len(base_model.layers))
fine_tune_at = 30
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False

model.compile(
    optimizer=tf.optimizers.Adam(LEARNING_RATE / 10),
    loss="categorical_crossentropy",
    metrics=["categorical_accuracy"]
)

print(model.summary())

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='min', min_delta=0.0001),
    tf.keras.callbacks.ModelCheckpoint(
        'weights_fine.tf', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=0, patience=15, verbose=1, restore_best_weights=True)
]

history_fine = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=history.epoch[-1]+FINE_TUNE_EPOCHS,
    initial_epoch=history.epoch[-1],
    callbacks=callbacks,
    verbose=1,
)

model.save_weights('weights_fine.tf')

model.evaluate(test_ds)

In [None]:
y_true, y_pred = [], []
for imgs, lbls in test_ds.take(len(test_ds)):
    y_true.append(lbls.numpy())
    y_pred.append(model.predict(imgs))

y_pred = np.argmax(np.concatenate(y_pred), axis=1) 
y_true = np.argmax(np.concatenate(y_true), axis=1)   

print(cr(y_pred=y_pred, y_true=y_true))
print(cm(y_pred=y_pred, y_true=y_true))

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7.5))
acc = history.history['categorical_accuracy'] + history_fine.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy'] + history_fine.history['val_categorical_accuracy']
loss = history.history['loss'] + history_fine.history['loss']
val_loss = history.history['val_loss'] + history_fine.history['val_loss']

ax1.plot(acc)
ax1.plot(val_acc)
ax1.set_title('model accuracy')
ax1.set(ylabel='accuracy', xlabel='epoch')
ax1.vlines(history.epoch[-1], 0, 1, colors='green')
ax1.legend(['train', 'val', 'fine_tune_start'], loc='upper left')


ax2.plot(loss)
ax2.plot(val_loss)
ax2.set_title('model loss')
ax2.set(ylabel='loss', xlabel='epoch')
ax2.vlines(history.epoch[-1], 0, max(loss), colors='green')
ax2.legend(['train', 'val', 'fine_tun_start'], loc='upper left')

plt.show()