## This notebook is used to load and extract video data into keyframes images

In [20]:
import cv2
import numpy as np
import os
import random

In [3]:
# extract key frames from video
def extract_key_frames(video_path, num_frames=10):
    # generate capture from video by using cv
    cap = cv2.VideoCapture(video_path)
    frames = []
    key_frames = []
    success, frame = cap.read()
    

    while success:
        # read frame 
        frames.append(frame)
        success, frame = cap.read()


    # release capture
    cap.release()

    # if no frames return null
    total_frames = len(frames)
    if total_frames == 0:
        return []
    
    
    diffs = []
    for i in range(1, total_frames):
        # calculate the difference between each frame
        diff = cv2.absdiff(cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY),
                           cv2.cvtColor(frames[i-1], cv2.COLOR_BGR2GRAY))
        diff_sum = np.sum(diff)
        diffs.append(diff_sum)
        
    if len(diffs) == 0:
        key_frame_indices = [0]
    else:
        # find the frame with max difference
        sorted_indices = np.argsort(diffs)[::-1]  # "::-1" means index of frames with the highest variance
        key_frame_indices = sorted_indices[:num_frames]

    # sort the indices
    key_frame_indices = sorted(set(key_frame_indices))  

    for idx in key_frame_indices:
        key_frames.append(frames[idx])

    return key_frames

In [27]:
# define the path of violence data
Violence_positive_video_dir = './data/violence_dataset/Violence'
violence_positive_out_dir = './key_frames/violence/set1'

# define the path of non-violence data
Violence_negative_video_dir = './data/violence_dataset/NonViolence'
violence_negative_out_dir = './key_frames/violence/set0'

# define the path of harmful data
tiktok_positive_video_dir = './data/tiktok/train/Harmful Content'
tiktok_positive_out_dir = './key_frames/tiktok/set1'

# define the path of safe data
tiktok_negative_video_dir = './data/tiktok/train/Safe'
tiktok_negative_out_dir = './key_frames/tiktok/set0'


In [18]:
# extract keyframes from files path
def extract_keyframes_from_files(video_dir,output_dir, num_frames=10,num_of_sample=100):
    
    video_files = [f for f in os.listdir(tiktok_negative_video_dir) if f.endswith('.mp4')]
    sample_video_files = random.sample(video_files,num_of_sample)
    for video_file in sample_video_files:
        video_path = os.path.join(video_dir, video_file)
        print(f"now extract keyframes from file: {video_file}...")
        key_frames = extract_key_frames(video_path, num_frames = num_frames)
        output_dir_temp = os.path.join(output_dir, os.path.splitext(video_file)[0])
        os.makedirs(output_dir_temp, exist_ok=True)
        
        for i, frame in enumerate(key_frames):
            frame_path = os.path.join(output_dir_temp, f'key_frame_{i}.jpg')
            cv2.imwrite(frame_path, frame)

In [24]:
extract_keyframes_from_files(Violence_positive_video_dir,violence_positive_out_dir)

now extract keyframes from file: V_581.mp4...
now extract keyframes from file: V_834.mp4...
now extract keyframes from file: V_394.mp4...
now extract keyframes from file: V_968.mp4...
now extract keyframes from file: V_166.mp4...
now extract keyframes from file: V_537.mp4...
now extract keyframes from file: V_465.mp4...
now extract keyframes from file: V_981.mp4...
now extract keyframes from file: V_60.mp4...
now extract keyframes from file: V_895.mp4...
now extract keyframes from file: V_734.mp4...
now extract keyframes from file: V_594.mp4...
now extract keyframes from file: V_475.mp4...
now extract keyframes from file: V_45.mp4...
now extract keyframes from file: V_648.mp4...
now extract keyframes from file: V_248.mp4...
now extract keyframes from file: V_757.mp4...
now extract keyframes from file: V_872.mp4...
now extract keyframes from file: V_291.mp4...
now extract keyframes from file: V_483.mp4...
now extract keyframes from file: V_340.mp4...
now extract keyframes from file: V_8

In [28]:
extract_keyframes_from_files(Violence_negative_video_dir,violence_negative_out_dir)

now extract keyframes from file: NV_268.mp4...
now extract keyframes from file: NV_383.mp4...
now extract keyframes from file: NV_183.mp4...
now extract keyframes from file: NV_159.mp4...
now extract keyframes from file: NV_437.mp4...
now extract keyframes from file: NV_985.mp4...
now extract keyframes from file: NV_495.mp4...
now extract keyframes from file: NV_178.mp4...
now extract keyframes from file: NV_611.mp4...
now extract keyframes from file: NV_249.mp4...
now extract keyframes from file: NV_639.mp4...
now extract keyframes from file: NV_414.mp4...
now extract keyframes from file: NV_46.mp4...
now extract keyframes from file: NV_218.mp4...
now extract keyframes from file: NV_182.mp4...
now extract keyframes from file: NV_376.mp4...
now extract keyframes from file: NV_480.mp4...
now extract keyframes from file: NV_917.mp4...
now extract keyframes from file: NV_346.mp4...
now extract keyframes from file: NV_768.mp4...
now extract keyframes from file: NV_518.mp4...
now extract ke

In [25]:
extract_keyframes_from_files(tiktok_positive_video_dir,tiktok_positive_out_dir)

now extract keyframes from file: coreuk_6992883643353976070.mp4...
now extract keyframes from file: diasmei_7328769514630368517.mp4...
now extract keyframes from file: dylanjackson230_6896937102961937670.mp4...
now extract keyframes from file: flffyegg_7321735344318254341.mp4...
now extract keyframes from file: gobungynepal_7308253924073491720.mp4...
now extract keyframes from file: bob.reese_7085983475299454254.mp4...
now extract keyframes from file: vtvcab.tintuc_7183144405086342402.mp4...
now extract keyframes from file: joescandrett_7248584533333904666.mp4...
now extract keyframes from file: ratedrawtv_7267262321620258078.mp4...
now extract keyframes from file: askinem_7319032656782740782.mp4...
now extract keyframes from file: duowheelshub_7101432223336779014.mp4...
now extract keyframes from file: alex.artfire_6692521856991235334.mp4...
now extract keyframes from file: girl_xinh.sexy_7281318488881384705.mp4...
now extract keyframes from file: josh_norton3_7312590065761570094.mp4.

In [26]:
extract_keyframes_from_files(tiktok_negative_video_dir,tiktok_negative_out_dir)

now extract keyframes from file: reviewbet_7206179678397222186.mp4...
now extract keyframes from file: bwi9512_7292742561595280641.mp4...
now extract keyframes from file: connhcruz_7248924081041575174.mp4...
now extract keyframes from file: chuoichienthimasterchef_7361807151481834784.mp4...
now extract keyframes from file: hoshiphan_7375512474809928961.mp4...
now extract keyframes from file: linhcuuhoadaily_7276018615009217794.mp4...
now extract keyframes from file: taphoaanvat0_7365855849266351376.mp4...
now extract keyframes from file: janenguyen2610_7261937040827174146.mp4...
now extract keyframes from file: 10vancauhoivisao247_7051534192915598593.mp4...
now extract keyframes from file: hamuvevoi_7261949013962837255.mp4...
now extract keyframes from file: mocuabatoctv_7053014064464809242.mp4...
now extract keyframes from file: chutuphuyen_7369556801873874177.mp4...
now extract keyframes from file: babykopohome_6814673856909839617.mp4...
now extract keyframes from file: yimi.toys_736