<a href="https://www.kaggle.com/code/junhyeonkwon/using-yunet?scriptVersionId=175918083" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

### Create dataset.csv from input dataset

In [1]:
# repo includes 
#   face_detection_yunet_2023mar.onnx
#   dataset.csv
#   dataset_nfaces.csv
!git clone https://github.com/luanakwon/Deepfake-Detection-MAICON2023.git

Cloning into 'Deepfake-Detection-MAICON2023'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 30 (delta 8), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (30/30), 267.20 KiB | 5.04 MiB/s, done.


In [2]:
import numpy as np
import pandas as pd
import cv2
from tqdm.notebook import tqdm
import shutil
import os
from glob import glob

In [3]:
# read video from path
# read total 64 frames
# if a frame has more or less then 1 face -> move video to error
# crop square area fitting bbox
# resize to 384
# return
def get_facecrops(detector, video_path, n_frame, max_t_len=0, scale=1, dsize=(384,384)):
    early_stop_flag = False
    if os.path.exists(video_path):
        # info about video
        cap = cv2.VideoCapture(video_path)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        length = min(length, int(max_t_len*fps)) if max_t_len>0 else length
        skips = length // n_frame
        
        # set detector
        detector.setInputSize((width, height))
        
        faces = []
        count_skips = 0
        bad_skips = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if ret:
                if count_skips % skips == 0:
                    ret2, det = detector.detect(frame)
                    # wrong detection, try next frame
                    if not ret2 or det is None or det.shape[0] < 1:
                        bad_skips += 1
                        # early stop - over 70% of the detects are useless
                        if early_stop_flag and bad_skips > skips+(bad_skips + len(faces))*0.7:
                            print(f"Early stopping at {cap.get(cv2.CAP_PROP_POS_FRAMES)/fps:.3}s.. ", end=' ')
                            cap.release()
                            break
                        continue
                    # multiple faces in frame
                    elif det.shape[0] > 1:
                        # append all
                        for det_each in det:
                            faces.append(crop_from_image(frame, det_each, scale, dsize))
                    # one face per frame
                    else:
                        faces.append(crop_from_image(frame,det[0],scale,dsize))
                    # early stop - stop reading when all n frames are found
                    if len(faces) >= n_frame:
                        cap.release()
                        break
                count_skips += 1
            else:
                cap.release()
                break
        # if len(faces)  != n_frame   
        if len(faces) != n_frame:
            print(f"Only found {len(faces)} face crops in {video_path.split('/')[-1]}")
        return faces
    else:
        raise FileNotFoundError(video_path)
    
def crop_from_image(frame, detection, scale, dsize):
    # frame - image of shape (H, W, C)
    # scale - float
    # dsize - (H,W)
    # detection - (1,15)
    x, y = detection[0:2] # 0-1: x, y of bbox top left corner
    width, height = detection[2:4] # 2-3: width, height of bbox
    # 4-5: x, y of right eye (blue point in the example image)
    # 6-7: x, y of left eye (red point in the example image)
    # 8-9: x, y of nose tip (green point in the example image)
    # 10-11: x, y of right corner of mouth (pink point in the example image)
    # 12-13: x, y of left corner of mouth (yellow point in the example image)
    # 14: face score
    
    # apply scale (up/down)
    dx = width*(scale-1)/2
    dy = height*(scale-1)/2
    x = x-dx
    y = y-dy
    x2 = x+width*scale
    y2 = y+height*scale
    # adjust aspect ratio
    r_in = height/width
    r_out= dsize[0]/dsize[1]
    if r_in > r_out: # in image taller, widen width
        new_w = height/r_out
        x -= (new_w-width)/2
        x2 += (new_w-width)/2
    elif r_in < r_out: # in image flatter
        new_h = width*r_out
        y -= (new_h-height)/2
        y2 += (new_h-height)/2
    # pad frame
    top = 0 if y >= 0 else int(abs(y))
    bottom = 0 if y2 < frame.shape[0] else int(y2-frame.shape[0])
    left = 0 if x >= 0 else int(abs(x))
    right = 0 if x2 < frame.shape[1] else int(x2-frame.shape[1])
    frame = cv2.copyMakeBorder(
        frame, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT,value=0)
    # crop 
    crop = frame[int(y+top):int(y2+top),int(x+left):int(x2+left)]
    # resize crop to dsize
    crop = cv2.resize(crop,(dsize[1],dsize[0]),fx=0,fy=0)
    
    return crop
    

In [4]:
model_path = '/kaggle/working/Deepfake-Detection-MAICON2023/tmp_data/FaceDet_model_Yunet/face_detection_yunet_2023mar.onnx'
detector = cv2.FaceDetectorYN.create(model_path,"", (320, 320))

os.makedirs('fakevideos-tmp', exist_ok=True)

# df_nface_path = '/kaggle/working/Deepfake-Detection-MAICON2023/tmp_data/csv/dataset_nfaces.csv'
df_nface_path = '/kaggle/input/fakevideos-tmp/metadata.csv'
df_nface = pd.read_csv(df_nface_path)

df_nface.head()

Unnamed: 0,video,label,split,path,num_faces,Unnamed: 5
0,yilongma_interview,FAKE,FF,/kaggle/input/fakevideos-tmp/yilongma_intervie...,1.0,
1,yilongma_shorts_1,FAKE,FF,/kaggle/input/fakevideos-tmp/yilongma_shorts_1...,1.0,
2,yilongma_shorts_2,FAKE,FF,/kaggle/input/fakevideos-tmp/yilongma_shorts_2...,1.0,
3,,,,,,
4,,,,,,


In [5]:
for _, row in tqdm(df_nface[df_nface['num_faces'] == 1].iterrows(),total=2549):
    video_name = row['video']
    video_path = row['path']
    # make dir
    dir_path = f"fakevideos-tmp/{video_name.split('.')[0]}"
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    os.mkdir(dir_path)
    
    # get face crops
    faces = get_facecrops(detector, video_path, n_frame=128, max_t_len = 30, scale=1.5)
    # save face crops
    for i,face in enumerate(faces):
        face_path = os.path.join(dir_path,f"{i:02}.jpg")
        cv2.imwrite(face_path,face)


  0%|          | 0/2549 [00:00<?, ?it/s]

In [6]:
print(faces)

[array([[[ 46,  80,  75],
        [ 50,  84,  79],
        [ 51,  83,  76],
        ...,
        [176, 196, 218],
        [177, 196, 217],
        [178, 196, 216]],

       [[ 43,  77,  72],
        [ 53,  87,  82],
        [ 65,  97,  90],
        ...,
        [176, 196, 218],
        [177, 196, 217],
        [178, 196, 216]],

       [[ 45,  79,  72],
        [ 50,  84,  77],
        [ 54,  87,  80],
        ...,
        [178, 196, 218],
        [178, 196, 217],
        [178, 196, 216]],

       ...,

       [[ 79, 143, 139],
        [ 63, 129, 125],
        [ 56, 136, 128],
        ...,
        [104,  88,  80],
        [109,  93,  85],
        [109,  93,  85]],

       [[ 88, 151, 148],
        [ 63, 131, 126],
        [ 56, 135, 127],
        ...,
        [108,  92,  84],
        [110,  94,  86],
        [112,  96,  88]],

       [[ 88, 155, 151],
        [ 67, 140, 133],
        [ 54, 134, 126],
        ...,
        [106,  90,  82],
        [112,  96,  88],
        [107,  92,  84]

In [7]:
# create dataset.csv
# columns video frame_id path label split

path = glob('/kaggle/working/fakevideos-tmp/*/*')
path = [p.lstrip('/kaggle/working/') for p in path]
video = [p.split('/')[-2]+'.mp4' for p in path]
frame_id = [int(p.split('/')[-1].rstrip('.jpg')) for p in path]

df1 = pd.read_csv('/kaggle/input/fakevideos-tmp/metadata.csv')
df2 = pd.DataFrame.from_dict(
    {
        'video' : video,
        'frame_id' : frame_id,
        'path' : path
    }
)
df1.drop(columns=['path'], inplace=True)
df = pd.merge(df2, df1, left_on='video',right_on='video',how='left')
df.sort_values(['split','path'],inplace=True)
print(len(df))
df.reset_index(drop=True, inplace=True)
df.head()

384


Unnamed: 0,video,frame_id,path,label,split,num_faces,Unnamed: 5
0,yilongma_interview.mp4,0,fakevideos-tmp/yilongma_interview/00.jpg,,,,
1,yilongma_interview.mp4,1,fakevideos-tmp/yilongma_interview/01.jpg,,,,
2,yilongma_interview.mp4,2,fakevideos-tmp/yilongma_interview/02.jpg,,,,
3,yilongma_interview.mp4,3,fakevideos-tmp/yilongma_interview/03.jpg,,,,
4,yilongma_interview.mp4,4,fakevideos-tmp/yilongma_interview/04.jpg,,,,


In [8]:
df.to_csv('fakevideos_meta.csv',index=False)

In [9]:
!rm -r /kaggle/working/Deepfake-Detection-MAICON2023