In [24]:
# Imports
import matplotlib.pyplot as plt
import cv2
import torch
import pathlib
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import logging
import shutil

os.environ['DEEPFACE_LOG_LEVEL'] = str(logging.ERROR)
from deepface import DeepFace

# Constants 
CWD = pathlib.Path(os.path.abspath(""))
GIT_ROOT = CWD.parent.parent
DATA_DIR = GIT_ROOT / "data" / 'AIED2024'
REID_DB = DATA_DIR / 'reid' / 'db'
OUTPUT_DIR = DATA_DIR / 'reid' / 'tables'

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [41]:
# From video, create the cropped face images
def generate_cropped_faces(video_file, tracking_file, output_dir):
    assert video_file.exists()
    assert tracking_file.exists()
    os.makedirs(output_dir, exist_ok=True)

    # Load data
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    df = pd.read_csv(tracking_file)

    for i in tqdm(range(LENGTH), total=LENGTH):
        
        # Load frame
        ret, frame = cap.read()
        if not ret:
            break

        # Get the detected faces
        detected_faces = df[df['Frame'] == i]

        for (j, row) in detected_faces.iterrows():
            crop = frame[int(row['Y']):int(row['Y']+row['Height']), int(row['X']):int(row['X']+row['Width'])]
            # cv2.imshow('crop', crop)
            # cv2.waitKey(0)

            cv2.imwrite(str(output_dir / f'frame_{i}_id_{int(row["Student_ID"])}.png'), crop)

generate_cropped_faces(
    DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
    DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
)

cv2.destroyAllWindows()

100%|██████████| 13464/13464 [01:08<00:00, 196.83it/s]


In [45]:
# Sanity check
def sanity_check(video_file: pathlib.Path, tracking_file: pathlib.Path, cropped_face_dir: pathlib.Path):
    assert video_file.exists()
    assert tracking_file.exists()
    assert cropped_face_dir.exists()

    # Load the file
    df = pd.read_csv(tracking_file)
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Get the face numpy array
    exists = 0
    for i in tqdm(range(LENGTH), total=LENGTH):
        
        # Get the detected faces
        detected_faces = df[df['Frame'] == i]

        for (j, row) in detected_faces.iterrows():
            face_crop = cropped_face_dir / f"frame_{i}_id_{int(row['Student_ID'])}.png"
            if face_crop.exists():
                exists += 1

    print(f"Df length: {len(df)}, Video length: {LENGTH}")
    print(f"Exists: {exists}/{len(df)} = {exists/len(df):.2f}")

sanity_check(
    DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
    DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
)

100%|██████████| 13464/13464 [00:10<00:00, 1343.82it/s]

Df length: 78894, Video length: 13464
Exists: 78894/78894 = 1.00





In [39]:
# 52,141 items according to file system
dir = DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1'
print(f"Total cropped images: {len([x for x in dir.iterdir()])}")

Total cropped images: 52141


In [56]:
SIZE_REQ = 40
CONFIDENCE_THRESHOLD = 0.5

def reid_process(video_file: pathlib.Path, tracking_file: pathlib.Path, cropped_face_dir: pathlib.Path, output_file: pathlib.Path):
    assert video_file.exists()
    assert tracking_file.exists()
    assert cropped_face_dir.exists()

    # Load the file
    df = pd.read_csv(tracking_file)
    cap = cv2.VideoCapture(str(video_file))
    LENGTH = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create output folder to verify users REID
    reid_folder = cropped_face_dir.parent / f"{cropped_face_dir.name}_reid"
    if reid_folder.exists():
        shutil.rmtree(reid_folder)
    os.makedirs(reid_folder, exist_ok=True)
    for id in ['1','2','3','4','5','6','sara','teacher']:
        os.makedirs(reid_folder/id, exist_ok=True)

    # Create reid container
    reid_container = {'cropped_file': [], 'reid': [], 'confidence': [], 'comment': []}

    # Get the face numpy array
    for i in tqdm(range(LENGTH), total=LENGTH):

        try:

            # Get the detected faces
            detected_faces = df[df['Frame'] == i]

            id_to_reid_mapping = {}

            for (j, row) in detected_faces.iterrows():

                filename = f"frame_{i}_id_{int(row['Student_ID'])}.png"
                face_crop = cropped_face_dir / filename
                assert face_crop.exists()
                crop = cv2.imread(str(face_crop))

                # If the image is to small, not worth the trouble
                h,w = crop.shape[:2]
                if (h < SIZE_REQ or w < SIZE_REQ):
                    reid_container['cropped_file'].append(face_crop.name)
                    reid_container['reid'].append(None)
                    reid_container['confidence'].append(None)
                    reid_container['comment'].append("image size too small")
                    continue

                # cv2.imshow('crop', crop)
                # cv2.waitKey(0)

                match_df = DeepFace.find(
                    img_path=crop,
                    db_path=REID_DB,
                    model_name="Facenet512",
                    distance_metric="euclidean_l2",
                    enforce_detection=False,
                    silent=True
                )[0]

                ids = match_df['identity'].str.split("/").str.get(-2)

                if len(ids) == 0:
                    reid_container['cropped_file'].append(face_crop.name)
                    reid_container['reid'].append(None)
                    reid_container['confidence'].append(None)
                    reid_container['comment'].append("Failed REID: No Match")
                    pass
                else:

                    # Success
                    counts = ids.value_counts(normalize=True)
                    maxid = counts.argmax()
                    reid = counts.index[maxid]
                    confidence = counts.iloc[maxid]

                    if confidence < CONFIDENCE_THRESHOLD:
                        reid_container['cropped_file'].append(face_crop.name)
                        reid_container['reid'].append(None)
                        reid_container['confidence'].append(None)
                        reid_container['comment'].append("Failed REID: Low confidence")
                    else:
                        reid_container['cropped_file'].append(face_crop.name)
                        reid_container['reid'].append(reid)
                        reid_container['confidence'].append(confidence)
                        reid_container['comment'].append("")

                    # Save the image as well
                    new_fp = reid_folder / str(reid) / filename
                    cv2.imwrite(str(new_fp), crop)

        except KeyboardInterrupt:
            print("KeyboardInterrupt detected, saving data")
            break

    # Save the container
    reid_df = pd.DataFrame(reid_container)
    reid_df.to_csv(output_file, index=False)

reid_process(
    DATA_DIR / 'videos' / 'day1' / 'block-a-blue-day1-first-group-cam2.mp4',
    DATA_DIR / 'trackings' / 'Day1Group1Camera2_with_student_IDs.csv',
    DATA_DIR / 'reid' / 'cropped_faces' / 'd1g1',
    OUTPUT_DIR / 'd1g1-cam2.csv'
)

cv2.destroyAllWindows()

  0%|          | 62/13464 [00:20<1:13:51,  3.02it/s]

KeyboardInterrupt detected, saving data



