In [None]:
from common import iter_posts, download_multiple_images, NumpyEncoder
from age_gender import get_age_gender
from extract_faces import FaceExtractor
from align import FaceAligner
from pathlib import Path
import json
import cv2

In [None]:
aligner = FaceAligner(padding=2)
face_extractor = FaceExtractor.default(aligner=aligner)

data_path = Path("data/first_impression.json")
image_path = Path("/media/idk/idk1/true_rateme/first_impression")
save_every = 100


In [None]:
if data_path.exists():
    with open(data_path) as f:
        data = json.load(f)
else:
    data = {}

In [None]:
for j, post in enumerate(iter_posts("firstimpression", 5_000)):
    if (
        post.id in data
        or post.removed_by_category
        or post.num_comments == 0
        or post.locked
    ):
        continue
    try:
        images = download_multiple_images(post)
    except Exception:
        continue

    if not images:
        continue

    detections, faces, urls = face_extractor(images)

    if not detections:
        continue

    gender, age = get_age_gender(post)
    comments = []
    for comment in post.comments:
        body = getattr(comment, "body", None)
        if body:
            comments.append(
                dict(
                    author=getattr(post.author, "id", None),
                    text=body,
                    score=post.score,
                )
            )

    if not comments:
        continue

    post_data = dict(
        gender=gender,
        age=age,
        urls=urls,
        detections=detections,
        title=post.title,
        score=post.score,
        author=getattr(post.author, "id", None),
        comments=comments,
    )

    data[post.id] = post_data

    for i, img in enumerate(faces):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(str(image_path / f"{post.id}-{i}.jpg"), img)

    if j and j % save_every:
        with open(data_path, "w") as f:
            json.dump(data, f, cls=NumpyEncoder)
