In [22]:
import matplotlib.pyplot as plt
import matplotlib.image
import numpy as np
import glob
import imageio.v3 as iio
import dlib
import os
from tqdm import tqdm

In [23]:
if not os.path.exists("./fer2013"):
    # download_fer2013()
    pass

def train_path(category, filename="*", processed=False):
    if processed: 
        return f"./fer2013/train_processed/{category}/{filename}.jpg"
    return f"./fer2013/train/{category}/{filename}.jpg"

def test_path(category, filename="*", processed=False):
    if processed: 
        return f"./fer2013/test_processed/{category}/{filename}.jpg"
    return f"./fer2013/test/{category}/{filename}.jpg"

emotions = [
    "angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"
]

if not os.path.exists("./fer2013/test_processed"): 
    os.system("mkdir ./fer2013/test_processed")
    for emotion in emotions: 
        os.system(f"mkdir ./fer2013/test_processed/{emotion}")
if not os.path.exists("./fer2013/train_processed"):
    os.system("mkdir ./fer2013/train_processed")
    for emotion in emotions: 
        os.system(f"mkdir ./fer2013/train_processed/{emotion}")

In [16]:
# train set percentages
detector = dlib.get_frontal_face_detector()

for emotion in emotions:
    total, withface = 0, 0
    for p in glob.glob(train_path(emotion)):
        im = iio.imread(p)
        bbox = detector(im, 1)
        if bbox:
            withface += 1
        total += 1

    print(emotion, withface, total, f"i.e., {100 * withface / total:.2f}%")

angry 2750 3995 i.e., 68.84%
disgust 340 436 i.e., 77.98%
fear 2542 4097 i.e., 62.05%
happy 5620 7215 i.e., 77.89%
neutral 3728 4965 i.e., 75.09%
sad 2663 4830 i.e., 55.13%
surprise 2358 3171 i.e., 74.36%


In [32]:
# do preprocess for training data, testing data
fullsize, halfsize, quadsize = 48, 48 // 2, 48 // 4

def do_preprocess():
    for pathfunc in [train_path, test_path]:
        for emotion in emotions: 
            for p in tqdm(glob.glob(pathfunc(emotion))):
                im = iio.imread(p)
                bboxes = detector(im, 1)
                for bbox in bboxes:
                    top = max(0, bbox.top())
                    bottom = min(bbox.bottom(), fullsize)
                    left = max(0, bbox.left())
                    right = min(bbox.right(), fullsize)

                    new_center = (top + bottom) // 2 - 6
                    if new_center < quadsize: 
                        top = 0
                        bottom = halfsize
                    elif new_center > fullsize - quadsize: 
                        top = fullsize - quadsize
                        bottom = quadsize
                    else:
                        top = new_center - quadsize
                        bottom = new_center + quadsize
                    im = im[top:bottom, :]

                    save_filename = "pd_" + p.strip().split("/")[-1][:-4]
                    matplotlib.image.imsave(
                        pathfunc(emotion, save_filename, processed=True), 
                        im
                    )

do_preprocess()


100%|██████████| 3995/3995 [00:11<00:00, 356.43it/s]
100%|██████████| 436/436 [00:01<00:00, 367.51it/s]
100%|██████████| 4097/4097 [00:11<00:00, 363.41it/s]
100%|██████████| 7215/7215 [00:22<00:00, 325.09it/s]
100%|██████████| 4965/4965 [00:15<00:00, 314.80it/s]
100%|██████████| 4830/4830 [00:13<00:00, 350.09it/s]
100%|██████████| 3171/3171 [00:09<00:00, 322.41it/s]
100%|██████████| 958/958 [00:02<00:00, 321.64it/s]
100%|██████████| 111/111 [00:00<00:00, 339.01it/s]
100%|██████████| 1024/1024 [00:02<00:00, 347.05it/s]
100%|██████████| 1774/1774 [00:05<00:00, 319.73it/s]
100%|██████████| 1233/1233 [00:03<00:00, 335.83it/s]
100%|██████████| 1247/1247 [00:03<00:00, 365.92it/s]
100%|██████████| 831/831 [00:02<00:00, 349.52it/s]
