In [None]:
import io
import IPython.display as ipd
import json
import numpy as np
import os
import requests
import time
from PIL import Image


### Resize face images for annotation script

In [None]:
NEW_SIZE = 384
FACES_DIR_IN = "../metfaces-dataset/images"
FACES_DIR_OUT = "./dataset/metfaces-faces"

os.makedirs(FACES_DIR_OUT, exist_ok=True)

FACES_FILES = [f for f in os.listdir(FACES_DIR_IN) if f.endswith(".png")]

for f in FACES_FILES:
    path_in = os.path.join(FACES_DIR_IN, f)
    path_out = os.path.join(FACES_DIR_OUT, f).replace(".png", ".jpg")

    if not os.path.isfile(path_out):
        with Image.open(path_in) as im:
            im.thumbnail((NEW_SIZE, NEW_SIZE))
            im.save(path_out, "JPEG")


### Resize unprocessed images and simplify metfaces-dataset.json

In [None]:
# face_rect region is specififed in [x0,y0, x1,y1] not [x,y,w,h]

METFACES_PATH = "../metfaces-dataset"
DATASET_PATH = "./dataset"

IMG_NEW_SIZE = 800
IMGS_DIR_IN = os.path.join(METFACES_PATH, "unprocessed")
IMGS_DIR_OUT = os.path.join(DATASET_PATH, "source-metfaces")

JSON_FILEPATH_IN = os.path.join(METFACES_PATH, "metfaces-dataset.json")
JSON_FILEPATH_OUT = os.path.join(DATASET_PATH, "metfaces-faces.json")

KEYS_TO_REMOVE = [
    "meta_url",
    "source_url",
    "source_md5",
    "image_md5",
    "source_path",
    "image_path",
    "title",
    "artist_display_name",
    "face_spec",
    "face_idx"
]

os.makedirs(IMGS_DIR_OUT, exist_ok=True)

if not os.path.isfile(JSON_FILEPATH_OUT):
    with open(JSON_FILEPATH_OUT, 'w') as json_file_out_write:
        json.dump([], json_file_out_write)

out_keys = set()
with open(JSON_FILEPATH_OUT) as json_file_out_read:
    mfd_out = json.load(json_file_out_read)

    for obj in mfd_out:
        out_keys.add(obj["obj_id"])

try:
    with open(JSON_FILEPATH_IN) as json_file_in:
        mfd = json.load(json_file_in)

        for p_obj in mfd:
            img_path_in = os.path.join(METFACES_PATH, p_obj["source_path"])
            img_path_out = img_path_in.replace(IMGS_DIR_IN, IMGS_DIR_OUT).replace(".png", ".jpg")
            obj_id = str(p_obj["image_path"]).replace("images/", "").replace(".png", "")

            if not os.path.isfile(img_path_out) or obj_id not in out_keys:
                print("processing: ", p_obj["source_path"], obj_id)
                with Image.open(img_path_in) as im:
                    (in_w, in_h) = im.size
                    im.thumbnail((IMG_NEW_SIZE, IMG_NEW_SIZE))
                    (out_w, out_h) = im.size
                    shrink = out_w / in_w * p_obj["face_spec"]["shrink"]
                    face_coords = [int(x * shrink) for x in p_obj["face_spec"]["rect"]]

                    landmarks = (np.float32(p_obj["face_spec"]["landmarks"]) + 0.5) * shrink
                    lm_eye_left = landmarks[36 : 42]
                    lm_eye_right = landmarks[42 : 48]
                    eye_left = np.mean(lm_eye_left, axis=0)
                    eye_right = np.mean(lm_eye_right, axis=0)
                    eye_avg = (eye_left + eye_right) * 0.5

                    face_dim = face_coords[2] - face_coords[0]
                    face_top = min(face_coords[1], eye_avg[1] - (face_dim / 2))
                    face_bottom = max(face_coords[3], eye_avg[1] + (face_dim / 2))

                    face_dim_new = face_bottom - face_top
                    face_left = eye_avg[0] - (face_dim_new / 2)
                    face_right = eye_avg[0] + (face_dim_new / 2)

                    p_obj["face_rect"] = [
                        int(max(0, face_left)),
                        int(max(0, face_top)),
                        int(min(out_w, face_right)),
                        int(min(out_h, face_bottom))
                    ]

                    if not os.path.isfile(img_path_out):
                        im.save(img_path_out, "JPEG")

                    p_obj["source_image"] = str(p_obj["source_path"]).replace("unprocessed/", "")
                    p_obj["obj_id"] = str(p_obj["image_path"]).replace("images/", "").replace(".png", "")

                for k in KEYS_TO_REMOVE:
                    del p_obj[k]

                if p_obj["obj_id"] not in out_keys:
                    mfd_out.append(p_obj)
                    out_keys.add(p_obj["obj_id"])

except Exception as e:
    print("Exception:\n", e)

finally:
    with open(JSON_FILEPATH_OUT, 'w') as json_file_out_write:
        json.dump(mfd_out, json_file_out_write)



### Resize and re-crop and get json for cordiais faces

In [None]:
METFACES_PATH = "../metfaces-dataset"
CORDIAIS_PATH = "../cordiais-analysis"
DATASET_PATH = "./dataset"

IMG_NEW_SIZE = 800
IMGS_DIR_IN = os.path.join(CORDIAIS_PATH, "imgs", "00_raw")
IMGS_DIR_OUT = os.path.join(DATASET_PATH, "source-cordiais")
FACES_DIR_OUT = os.path.join(DATASET_PATH, "cordiais-faces")

JSON_FILEPATH = os.path.join(DATASET_PATH, "cordiais-faces.json")

FACE_API_URL = 'https://api-us.faceplusplus.com/facepp/v3/detect'
FACE_API_DATA = {
    'api_key': os.environ.get('FACEPP_KEY'),
    'api_secret': os.environ.get('FACEPP_SECRET'),
    'return_attributes': 'facequality'
}

os.makedirs(IMGS_DIR_OUT, exist_ok=True)
os.makedirs(FACES_DIR_OUT, exist_ok=True)

if not os.path.isfile(JSON_FILEPATH):
    with open(JSON_FILEPATH, 'w') as json_file_out_write:
        json.dump([], json_file_out_write)


out_keys = set()
mfd_out= {}
with open(JSON_FILEPATH) as json_file_out_read:
    mfd_out = json.load(json_file_out_read)

    for obj in mfd_out:
        out_keys.add(obj["source_image"])

img_files = sorted([f for f in os.listdir(IMGS_DIR_IN) if f.endswith(".jpg")])

try:
    for fn in img_files:
        source_image = fn.replace("_raw.jpg", "")
        img_path_in = os.path.join(IMGS_DIR_IN, fn)
        img_path_out = os.path.join(IMGS_DIR_OUT, "%s.jpg" % source_image)

        if not os.path.isfile(img_path_out) or source_image not in out_keys:
            with Image.open(img_path_in) as im:
                if im.size[0] > 2048 or im.size[1] > 2048:
                    im.thumbnail((2048, 2048))
                (in_w, in_h) = im.size

                in_img_byte_arr = io.BytesIO()
                im.save(in_img_byte_arr, format=im.format)

                im.thumbnail((IMG_NEW_SIZE, IMG_NEW_SIZE))
                (out_w, out_h) = im.size
                shrink = out_w / in_w

                if source_image not in out_keys:
                    print("processing: ", source_image)

                    files = { 'image_file': in_img_byte_arr.getvalue() }
                    time.sleep(1.25)
                    res = requests.post(FACE_API_URL, files=files, data=FACE_API_DATA)
                    res_o = json.loads(res.text)

                    if not res.ok:
                        print("not ok: ", res)

                    if res.ok and res_o["face_num"] > 0:
                        if not os.path.isfile(img_path_out):
                            im.save(img_path_out, "JPEG")

                        for fi, face in enumerate(res_o["faces"]):
                            face_num_str = ("000000%s" % fi)[-2:]
                            face_slug = "%s-%s" % (source_image, face_num_str)

                            face_rect = face["face_rectangle"]
                            face_bottom = face_rect["top"] + face_rect["height"]
                            face_width_center = face_rect["left"] + (face_rect["width"] / 2)
                            face_top = max(0, int(face_rect["top"] - 0.2 * face_rect["height"]))
                            face_dim_delta = (face_bottom - face_top) / 2

                            face_left_new = max(0, int(shrink * (face_width_center - face_dim_delta)))
                            face_top_new = max(0, int(shrink * face_top))
                            face_right_new = min(out_w, int(shrink * (face_width_center + face_dim_delta)))
                            face_bottom_new = min(out_h, int(shrink * face_bottom))
                            face_region_new = (
                                face_left_new,
                                face_top_new,
                                face_right_new,
                                face_bottom_new
                            )

                            out_obj = {
                                "source_image": source_image,
                                "obj_id": face_slug,
                                "face_rect": list(face_region_new)
                            }

                            faces_path_out = os.path.join(FACES_DIR_OUT, "%s.jpg" % face_slug)

                            if not os.path.isfile(faces_path_out):
                                im_crop = im.crop(face_region_new)
                                im_crop.save(faces_path_out, "JPEG")

                            mfd_out.append(out_obj)
            
                        out_keys.add(source_image)

except Exception as e:
    print("Exception:\n", e)

finally:
    with open(JSON_FILEPATH, 'w') as json_file_out_write:
        json.dump(mfd_out, json_file_out_write)


### iterate through jsons and annotate genre: F / NOTFEMALE

In [None]:
DATASET_PATH = "./dataset"
sets = ["cordiais", "metfaces"]

for set in sets:
    json_file_path = os.path.join(DATASET_PATH, "%s-faces.json" % set)

    faces = []
    with open(json_file_path) as json_file_read:
        faces = json.load(json_file_read)

    for face in faces:
        if "gender" not in face:
            painting_path = os.path.join(DATASET_PATH, "source-%s" % set, "%s.jpg" % face["source_image"])
            face_path = os.path.join(DATASET_PATH, "%s-faces" % set, "%s.jpg" % face["obj_id"])

            painting_html = "<td><img src='%s' height=300></td>" % painting_path
            face_html = "<td><img src='%s' height=300></td>" % face_path

            html_string = "<table><tr>%s %s</tr></table>" % (painting_html, face_html)
            ipd.display(ipd.HTML(html_string), clear=True)

            key = input()
            if key == "f":
                face["gender"] = "female"
            elif key == "s" or key == "q":
                with open(json_file_path, 'w') as json_file_out_write:
                    json.dump(faces, json_file_out_write)
            else:
                face["gender"] = "notfemale"

            if key == "q":
                break

    with open(json_file_path, 'w') as json_file_out_write:
        json.dump(faces, json_file_out_write)

