Predict skin tone from image.

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tqdm.notebook as tqdm
import skimage.color

In [None]:
df_train = pd.read_csv('../input/fairface/FairFace/train_labels.csv')
df_train['split'] = 'train'
df_test = pd.read_csv('../input/fairface/FairFace/val_labels.csv')
df_test['split'] = 'test'
df = pd.concat([df_train, df_test])

df = df.drop(columns=['service_test', 'gender', 'age'])

# EDA + data_prep

In [None]:
df.describe()

In [None]:
df['race'].value_counts().plot.bar()
plt.show()
plt.close()

In [None]:
df_train = df[df.split == 'train']
df_test = df[df.split == 'test']

# Classical skin segmentation model

We first demonstrate the approach. It is my own implementation of the approach described here http://www.eleco.org.tr/openconf_2017/modules/request.php?module=oc_proceedings&action=view.php&id=248&file=1/248.pdf&a=Accept+as+Lecture

The goal is to extract the average RGB values of the skin pixels


In [None]:
def run_histogram_equalization(img_bgr):
    """Histogram eq of colour image.
    
    We convert BGR to YCrCb and select the Y channel (which represents brightness).
    Perform histogram eq. on the Y and then convert back to BGR
    
    https://stackoverflow.com/a/38312281/6594629
    https://www.opencv-srf.com/2018/02/histogram-equalization.htm
    """
    img_ycrcb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)
    img_ycrcb[:, :, 0] = cv2.equalizeHist(img_ycrcb[:, :, 0])
    img_bgr = cv2.cvtColor(img_ycrcb, cv2.COLOR_YCrCb2BGR)
    return img_bgr


def segment_otsu(image_grayscale, img_BGR):
    """Segment using otsu binarization and thresholding."""
    threshold_value, threshold_image = cv2.threshold(image_grayscale, 0, 255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    threshold_image_binary = 1 - (threshold_image / 255)
    threshold_image_binary = np.repeat(threshold_image_binary[:, :, np.newaxis], 3, axis=2)
    img_face_only = np.multiply(threshold_image_binary, img_BGR).astype('uint8')
    return img_face_only

img_bgr = cv2.imread('../input/fairface/FairFace/train/1.jpg')
print("Original Image")
plt.imshow(img_bgr[:, :, ::-1])
plt.show()

print("Original Image after Histogram eq.")
img_bgr = run_histogram_equalization(img_bgr)
plt.imshow(img_bgr[:, :, ::-1])
plt.show()

print("Grayscale Image")
img_grayscale = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
plt.imshow(img_grayscale, cmap='gray')
plt.show()

print("After segmentation using Otsu's method.")
img_bgr = segment_otsu(img_grayscale, img_bgr)
plt.imshow(img_bgr[:, :, ::-1])
plt.show()

print("HSV space")
img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
plt.imshow(img_hsv[:, :, ::-1])
plt.show()

print("YCrCb space")
img_ycrcb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)
plt.imshow(img_ycrcb[:, :, ::-1])
plt.show()
    
mask = (img_hsv[:, :, 0] <= 170) & \
    (img_ycrcb[:, :, 1] >= 140) & \
    (img_ycrcb[:, :, 1] <= 170) & \
    (img_ycrcb[:, :, 2] >= 90) & \
    (img_ycrcb[:, :, 2] <= 120)

print("After masking")
img_bgr[~mask] = 0
plt.imshow(img_bgr[:, :, ::-1])
plt.show()

blue = np.ma.array(img_bgr[:, :, 0], mask=~mask).mean()
green = np.ma.array(img_bgr[:, :, 1], mask=~mask).mean()
red = np.ma.array(img_bgr[:, :, 2], mask=~mask).mean()

print(f"Final average skin tone RGB: {red}, {green}, {blue}")
plt.imshow([[[x / 255 for x in [red, green, blue]]]])
plt.show()

In [None]:
def predict_img(img_bgr, equalize=False):
    if isinstance(img_bgr, str):
        img_bgr = cv2.imread(img_bgr)
    
    if equalize:
        img_bgr = run_histogram_equalization(img_bgr)

    img_grayscale = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

    img_bgr = segment_otsu(img_grayscale, img_bgr)
    del img_grayscale

    img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    img_ycrcb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)
    
    mask = (img_hsv[:, :, 0] <= 170) & \
        (img_ycrcb[:, :, 1] >= 140) & \
        (img_ycrcb[:, :, 1] <= 170) & \
        (img_ycrcb[:, :, 2] >= 90) & \
        (img_ycrcb[:, :, 2] <= 120)

    img_bgr[~mask] = 0

    blue = np.ma.array(img_bgr[:, :, 0], mask=~mask).mean()
    green = np.ma.array(img_bgr[:, :, 1], mask=~mask).mean()
    red = np.ma.array(img_bgr[:, :, 2], mask=~mask).mean()
    
    return blue, green, red

In [None]:
# We have a lot of data, subsample
N_TRAIN = len(df_train)  # 2000
N_TEST = len(df_test)  # int(0.2 * N_TRAIN)

np.random.seed(0)
df_train = df_train.loc[np.random.randint(0, len(df_train), N_TRAIN)]
df_test = df_test.loc[np.random.randint(0, len(df_test), N_TEST)]

In [None]:
x_train = np.array([predict_img(os.path.join('../input/fairface/FairFace', file_)) for file_ in tqdm.tqdm(df_train.file.values)])
x_test = np.array([predict_img(os.path.join('../input/fairface/FairFace', file_)) for file_ in tqdm.tqdm(df_test.file.values)])

In [None]:
y_train, y_train_map = pd.Categorical(df_train.race).factorize()
y_test, y_test_map = pd.Categorical(df_test.race).factorize()

In [None]:
# Drop cases where segmentation fails
train_idx = ~(np.isnan(x_train).any(axis=1))
x_train = x_train[train_idx]
y_train = y_train[train_idx]

test_idx = ~(np.isnan(x_test).any(axis=1))
x_test = x_test[test_idx]
y_test = y_test[test_idx]

In [None]:
# Add ita angle feature
def compute_ita(r, g, b):
    l, a, b = skimage.color.rgb2lab([[[r, g, b]]], illuminant='D65', observer='10').flatten()
    ita = (l - 50) / b
    ita = np.arctan(ita)
    ita = np.rad2deg(ita)
    return ita


b, g, r = predict_img(cv2.imread('../input/fairface/FairFace/train/1.jpg'))
compute_ita(r, g, b)

ita_train = np.array([[compute_ita(r, g, b)] for r, g, b in x_train])
x_train = np.hstack((x_train, ita_train))

ita_test = np.array([[compute_ita(r, g, b)] for r, g, b in x_test])
x_test = np.hstack((x_test, ita_test))

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(20, 5), sharex=True)
_labels = ['blue', 'green', 'red', 'ita']
for i, ax in enumerate(axs.flatten()):
    scatter = ax.scatter(y_train, x_train[:, i], c=y_train, label=y_train)
    ax.set_xlabel('Class')
    ax.set_ylabel(_labels[i])
    # legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
    # ax.add_artist(legend)

plt.show()
plt.close()
del _labels

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.metrics import confusion_matrix as cm, classification_report as cr

rf = RFC(random_state=0).fit(x_train, y_train)

y = rf.predict(x_train)
print(cm(y_true=y_train, y_pred=y))
print(cr(y_true=y_train, y_pred=y))

In [None]:
y = rf.predict(x_test)
print(cm(y_true=y_test, y_pred=y))
print(cr(y_true=y_test, y_pred=y))