In [None]:
from utils.common import (
    iter_posts,
    save_data,
    load_data,
    download_multiple_images,
)
from config import SAVE_EVERY, DATA_PATH, IMG_PATH
import cv2
import numpy as np
from insightface.app import FaceAnalysis
from utils.align import FaceAligner
from utils.regexes import extract_ranking
from collections import defaultdict


In [None]:
sub = 'Ifyouhadtopickone'

In [None]:
img_path = IMG_PATH / sub
data_path = DATA_PATH / f"{sub}.json"

img_path.mkdir(exist_ok=True, parents=True)
min_score = 0.7

app = FaceAnalysis(name="buffalo_m", allowed_modules=["detection"])
app.prepare(ctx_id=0, det_size=(640, 640))


In [None]:
data = load_data(data_path)


In [None]:
def extract_votes(post):
    user_ratings = []
    anots = defaultdict(int)

    for c in post.comments:
        anwser = None
        body = c.body.strip().lower()
        failed = False

        if body == "l":
            anwser = 0

        elif body == "r":
            anwser = -1

        elif "bottom" in body or "top" in body:
            failed = True

        else:
            if "left" in body:
                failed = anwser is not None
                anwser = 0

            if "right" in body:
                failed = anwser is not None
                anwser = -1

            if "middle" in body or "center" in body:
                failed = anwser is not None
                anwser = "middle"

            for char in body:
                if char.isdecimal() and char != "0":
                    failed = anwser is not None
                    if failed:
                        break
                    anwser = int(char) - 1

        if not failed and anwser is not None:
            anots[anwser] += 1
            user_ratings.append(
                dict(
                    username=getattr(c.author, "name", None),
                    value=anwser,
                    text=body,
                    ranking=None,
                )
            )
        else:
            user_ratings.append(
                dict(
                    username=getattr(c.author, "name", None),
                    text=body,
                    value=None,
                    ranking=extract_ranking(body),
                )
            )

    return user_ratings, anots


In [None]:
def extract_multiple_faces(imgs):
    aligned = []
    detections = []
    urls = []

    for img, url in imgs:
        faces = app.get(img)
        if len(faces) != 1:
            return [], [], []
        aligned.append(aligner.from_insight_face(img, faces)[0])
        detections.append(faces[0])
        urls.append(url)

    return aligned, detections, urls

In [None]:
posts = iter_posts(sub, 5_000)

In [None]:
aligner = FaceAligner()

for j, post in enumerate(posts):
    if (
        post.num_comments < 1
        or post.locked
        or post.removed_by_category
        or post.id in data
    ):
        continue
    try:

        imgs = download_multiple_images(post)

        if imgs is None:
            continue

        user_ratings, anots = extract_votes(post)

        if not anots:
            continue

        middle = anots.pop("middle", 0)
        last = anots.pop(-1, 0)
        min_faces = max(max(anots.keys()) + 1, 2) if anots else 2

        if len(imgs) == 1:
            img = imgs[0][0]
            faces = app.get(img)

            if len(faces) < min_faces:
                continue

            if middle and min_faces % 2 == 0:
                if min_faces == len(faces):
                    continue
                min_faces += 1

            good_faces = faces[:min_faces]

            for i in range(min_faces, len(faces)):
                face = faces[i]
                if face["det_score"] < min_score:
                    break
                good_faces.append(face)

            sorted_faces = sorted(good_faces, key=lambda x: x["bbox"][0])
            aligned_faces = aligner.from_insight_face(img, sorted_faces)
            urls = [post.url]
        else:
            aligned_faces, sorted_faces, urls = extract_multiple_faces(imgs)

            if len(aligned_faces) < min_faces:
                continue

        data[post.id] = dict(
            ratings=user_ratings,
            detection=sorted_faces,
            urls=urls,
            username=getattr(post.author, "name", None),
        )

        ratings = np.zeros(len(aligned_faces))
        ratings[-1] = last
        ratings[len(aligned_faces) // 2] += middle

        for i, rate in anots.items():
            ratings[i] += rate

        dir_path = img_path / post.id
        dir_path.mkdir(exist_ok=True)

        for i, (img, votes) in enumerate(zip(aligned_faces, ratings)):
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            cv2.imwrite(str(dir_path / f"{i}-{int(votes)}.jpg"), img)

    except Exception as E:
        print(E)

    if j % SAVE_EVERY == 0:
        save_data(data_path, data)


In [None]:
save_data(data_path, data)