In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras 
import pandas as pd
import os
import cv2
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [2]:
model = keras.models.load_model("saved_model_10_epochs.h5")
meta_data = pd.read_csv("heysem_data/eth_gender_anno_all.xlsx - eth_gender_trait_annotations_de.csv")
# use same encoding 
meta_data['Gender'] = meta_data["Gender"].replace(2, 0)

In [3]:
# clear difference in occurences in data
meta_data["Ethnicity"].value_counts()

Ethnicity
2    8598
3    1071
1     331
Name: count, dtype: int64

In [4]:
def load_and_preprocess_image(image_path):
    # NOTE: ensure this matches the original model!!!
    img = cv2.imread(image_path)
    img = cv2.resize(img, (256, 256))
    # img = img.astype(np.float32) / 255.0  # Normalize pixel values
    return img

In [5]:
# ends with .JPG is just a safety measure, unsure if needed
img_paths = ["heysem_data/dev/" + file for file in os.listdir("heysem_data/dev/") if file.endswith('.JPG')]

first_and_last = img_paths[:100] + img_paths[-100:]
image_data = [load_and_preprocess_image(path) for path in first_and_last]

In [6]:
# this takes qiuite a long time (~8 sec for 200 paths on teun's laptop)
dataset = tf.data.Dataset.from_tensor_slices(image_data).batch(32)

In [7]:
# this takes a long time on my laptop, 1-2min on 200 samples
predictions = []
for batch in dataset:
    # argmax with axis=1 to get label immediately
    predictions.append(np.argmax(model.predict(batch), axis=1))



In [8]:
results_dict = {}

# hstack to flatten predictions
for pred, path in zip(np.hstack(predictions), first_and_last):
    trim_path = path[16:-4]
    row = meta_data.loc[meta_data["VideoName"] == trim_path]
    # print(row)
    # print(tuple(row[["Ethnicity", "Gender"]].values))
    ethnicity, gender = tuple(row[["Ethnicity", "Gender"]].values[0])

    results_dict[trim_path] = (pred, ethnicity, gender) 
    

In [9]:
df = pd.DataFrame.from_dict(results_dict).T
df.columns = ["gender_prediction", "ethnicity", "gender_true"]
print(df.shape)
df.head()

(200, 3)


Unnamed: 0,gender_prediction,ethnicity,gender_true
--Ymqszjv54.001.mp4,1,2,1
--Ymqszjv54.003.mp4,1,2,1
--Ymqszjv54.004.mp4,1,2,1
--Ymqszjv54.005.mp4,1,2,1
-2qsCrkXdWs.001.mp4,1,2,1


In [10]:
# index to ethnicity according to orignal dataset
i2e = {1: "Asian", 2:"Caucasian", 3:"African-American"}

for e in i2e.keys():
    e_df = df.loc[df['ethnicity']==e]
    
    print(f"\n\n{i2e[e]}")
    acc = round(sum(e_df['gender_prediction'] == e_df['gender_true'])/len(e_df), 3)

    print(f"Accuracy: {acc}")
    cm = confusion_matrix(e_df["gender_true"], e_df["gender_prediction"])
    print(ConfusionMatrixDisplay(cm))





Asian
Accuracy: 0.818
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at 0x0000018EC58174F0>


Caucasian
Accuracy: 0.769
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at 0x0000018EC57D6A10>


African-American
Accuracy: 0.7
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at 0x0000018EC3DA2EC0>
