## Get cleaner annotations and save them as yaml

In [None]:
import yaml

with open('configs/paths.yaml', 'r') as stream:
    PATHS = yaml.safe_load(stream)

ACTORS = ['Chandler', 'Joey', 'Monica', 'Phoebe', 'Rachel', 'Ross']
ONLY_ACTORS = False
ONLY_FACE = False

In [None]:
import os
from glob import glob
import csv
from tqdm.notebook import tqdm

if not os.path.isfile(PATHS['ANNOTATIONS']['all']):
    annotations = {}
    for DATASET in ['train', 'dev', 'test']:
        with open(PATHS['ANNOTATIONS'][DATASET]) as f:
            reader = csv.reader(f)
            annotations[DATASET] = list(reader)

    datasets = {}
    weird = 0
    for DATASET in tqdm(['train', 'dev', 'test']):
        datasets[DATASET] = {}
        for row in tqdm(annotations[DATASET][1:]):
            SrNo, Utterance, Speaker, Emotion, Sentiment, Dialogue_ID,\
                Utterance_ID, Season, Episode, StartTime, EndTime = row

            to_save = ['SrNo', 'Utterance', 'Speaker', 'Emotion', 'Sentiment', 'Dialogue_ID',
                        'Utterance_ID', 'Season', 'Episode', 'StartTime', 'EndTime']
                        
            if ONLY_ACTORS:
                if Speaker not in ACTORS:
                    continue
                         

            if f"dia{Dialogue_ID}_utt{Utterance_ID}.mp4" not in str(os.listdir(PATHS['ORIGINAL_VIDS'][DATASET])):
                weird+=1
                continue

            datasets[DATASET][f"dia{Dialogue_ID}_utt{Utterance_ID}"] = \
                {'SrNo': SrNo, 
                'Utterance': Utterance, 
                'Speaker': Speaker, 
                'Emotion': Emotion, 
                'Sentiment': Sentiment, 
                'Dialogue_ID': Dialogue_ID,
                'Utterance_ID': Utterance_ID, 
                'Season': Season, 
                'Episode': Episode, 
                'StartTime': StartTime, 
                'EndTime':EndTime}
                    
    print(f"the number of video that didn't match is : {weird}")


    with open(PATHS['ANNOTATIONS']['all'], 'w') as stream:
        yaml.safe_dump(datasets, stream, encoding='utf-8', allow_unicode=True)

else:
    with open(PATHS['ANNOTATIONS']['all'], 'r') as stream:
        datasets = yaml.safe_load(stream)

## Instantiate the three classes

In [None]:
from cltl_face_all.face_alignment import FaceDetection
from cltl_face_all.arcface import ArcFace

fd = FaceDetection(device='cpu', face_detector='sfd')
af = ArcFace()

if not ONLY_FACE:
    from cltl_face_all.agegender import AgeGender
    ag = AgeGender(device='cpu')

## Run over the images

In [None]:
import av
import numpy as np
import os

import signal
import time
from tqdm.notebook import tqdm

class Timeout(Exception):
    pass

def handler(sig, frame):
    raise Timeout

signal.signal(signal.SIGALRM, handler) 

for DATASET in tqdm(['train', 'dev', 'test']):
    os.makedirs(PATHS['VISUAL_FEATURES'][DATASET], exist_ok=True)

    for diautt, anno in tqdm(datasets[DATASET].items()):
        signal.alarm(10)
        try:
            to_dump = {}
            vidpath = os.path.join(PATHS['ORIGINAL_VIDEOS'][DATASET], diautt) + '.mp4'
            container = av.open(vidpath)
            save_full_path = os.path.join(PATHS['VISUAL_FEATURES'][DATASET], diautt) + '.npy'
        except Exception as e:
            print(e)
            continue

        if os.path.isfile(save_full_path) and os.path.getsize(save_full_path) > 256:
            continue

        for idx, frame in enumerate(container.decode(video=0)):
            signal.alarm(10)
            try:
                to_dump[idx] = []
                numpy_RGB = np.array(frame.to_image())
                batch = numpy_RGB[np.newaxis, ...]
                bboxes = fd.detect_faces(batch)
                # print(f"number of faces detected in the frame {idx} is {len(bboxes[0])}")

                if len(bboxes[0]) == 0:
                    continue

                landmarks = fd.detect_landmarks(batch, bboxes)

                faces = fd.crop_and_align(batch, bboxes, landmarks)
                faces = np.concatenate(faces, axis=0)
                embeddings = af.predict(faces)
                
                if ONLY_FACE:
                    ages, genders = [None] * len(embeddings), [None] * len(embeddings)
                else:
                    ages, genders = ag.predict(faces)

                # print(len(bboxes[0]), len(landmarks[0]), len(ages), len(genders), len(embeddings), len(faces))
                for bb, lm, a, g, emb in zip(bboxes[0], landmarks[0], ages, genders, embeddings):
                    x1, y1, x2, y2, prob = bb

                    to_append = {'bbox': bb, 
                                'landmark': lm, 
                                'age': a,
                                'gender': g,
                                'embedding': emb}

                    to_dump[idx].append(to_append)
                # print(len(to_dump), idx+1)
            except Exception as e:
                print(e)
                continue

        np.save(save_full_path, to_dump)