## Setup

In [1]:
import os
import pickle
import gc
import json
import glob
import time
import threading
import queue
import itertools
import pandas as pd
import numpy as np
import math
from tqdm import tqdm

# Image library
import cv2
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
data_dir = '/kaggle/input/liveness-detection-zalo-2022/train/train'
label_dir = os.path.join(data_dir, "label.csv")
video_dir = os.path.join(data_dir, "videos")

In [3]:
CFG = {
    'show_examples': True,
    'vectorize': False,
    'batch_size': 60,
    'n_frames': 15,
    'face_shape': (160,160),
    'create_dataset_new': True,
}

In [4]:
df = pd.read_csv(label_dir)
if CFG['show_examples']:
    print(df.head())

   fname  liveness_score
0  1.mp4               0
1  2.mp4               1
2  3.mp4               1
3  5.mp4               0
4  7.mp4               1


## Cut Random Frames

In [5]:
def extract_frames(df, img_path, dataset_path):
    v_ids = df.index.tolist()
    output_metadata = {'fname': [], 'img_name': [], 'liveness_score':[]}

    for _, idx in tqdm(enumerate(v_ids), total=len(v_ids)):
        try:
            ## Cut video frames
            v_dir = os.path.join(video_dir, df.iloc[idx]['fname'])
            v_cap = cv2.VideoCapture(v_dir)
            success = v_cap.grab()        
            v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
            fnos = list(range(0, v_len, CFG['n_frames']))

            # set initial frame 
            v_cap.set(cv2.CAP_PROP_POS_FRAMES, fnos[0])

            pos, count = 0, fnos[0]
            while success:
                if count == fnos[pos]:
                    success, frame = v_cap.retrieve()
                    if not success:               
                        break
                    
                    img_name = f"{df.iloc[idx]['fname'].split('.')[0]}_{count}.jpg"
                    cv2.imwrite(os.path.join(img_path, img_name), frame)
                    
                    # Add metadata
                    output_metadata['fname'].append(df.iloc[idx]['fname'])
                    output_metadata['img_name'].append(img_name)
                    output_metadata['liveness_score'].append(df.iloc[idx]['liveness_score'])

                    pos += 1
                    if pos >= len(fnos):
                        break
                
                count += 1
                success = v_cap.grab()
            
            v_cap.release()


        except KeyboardInterrupt:
            print('\nStopped.')
            break

        except Exception as e:
            print(e)

    output_metadata = pd.DataFrame(output_metadata)
    output_metadata = output_metadata.dropna()
    output_metadata.to_csv(os.path.join(dataset_path, "metadata.csv"), index=False)

## Main

In [6]:
if __name__ == '__main__':
    
    ## Prepare Kaggle dataset for uploading
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    secret_value_0 = user_secrets.get_secret("KAGGLE_KEY")

    os.makedirs('/kaggle/dataset/', exist_ok=True)
    os.makedirs('/kaggle/dataset/images', exist_ok=True)
    img_path = '/kaggle/dataset/images'
    dataset_path = '/kaggle/dataset'
    
    ## Kaggle API token
    os.makedirs('/root/.kaggle/', exist_ok=True)
    api_token = {"username":"vovanquangnbk","key":"507e3751d7cd3d60453ea1abe2b9ca9c"}
    with open('/root/.kaggle/kaggle.json', 'w') as file:
        json.dump(api_token, file)
    !chmod 600 /root/.kaggle/kaggle.json
    
    ## Kaggle dataset metadata
    meta = dict(
        id="vovanquangnbk/fas-zalo-2022-frames",
        title="FAS Zalo 2022 Frames",
        isPrivate=True,
        licenses=[dict(name="other")]
    )

    with open('/kaggle/dataset/dataset-metadata.json', 'w') as f:
        json.dump(meta, f)
    
    ## run main function
    extract_frames(df, img_path, dataset_path)
    
    ## upload to Kaggle
    # !kaggle datasets create -p "/kaggle/dataset" --dir-mode zip
    !kaggle datasets version -p "/kaggle/dataset" -m "Updated via notebook" --dir-mode zip

100%|██████████| 1168/1168 [11:13<00:00,  1.73it/s]


Starting upload for file images.zip
100%|███████████████████████████████████████| 2.56G/2.56G [00:12<00:00, 212MB/s]
Upload successful: images.zip (3GB)
Starting upload for file metadata.csv
100%|████████████████████████████████████████| 264k/264k [00:00<00:00, 1.47MB/s]
Upload successful: metadata.csv (264KB)
Dataset version is being created. Please check progress at https://www.kaggle.com/vovanquangnbk/fas-zalo-2022-frames
