In [1]:
import os
import cv2
import json
import glob
import numpy as np
from tqdm import tqdm

# Parse video info

In [2]:
videos_dir = 'D:/AIChallenge/data_sotuyen1/video'
videos_dir2 = 'D:/AIChallenge/data_sotuyen2/video'

all_video_paths = dict()

# Hàm để gom video paths từ một thư mục
def collect_video_paths_from_dir(base_dir, all_video_paths):
    for part in sorted(os.listdir(base_dir)):
        data_part = part.split('_')[-1]  # L01, L02 for ex
        
        # Nếu thư mục này chưa có trong all_video_paths, khởi tạo dict cho nó
        if data_part not in all_video_paths:
            all_video_paths[data_part] = dict()
        
        data_part_path = f'{base_dir}/Videos_{data_part}/video'
        
        if os.path.exists(data_part_path):
            video_paths = sorted(os.listdir(data_part_path))
            video_ids = [video_path.replace('.mp4', '').split('_')[-1] for video_path in video_paths]
            for video_id, video_path in zip(video_ids, video_paths):
                video_path_full = f'{data_part_path}/{video_path}'
                all_video_paths[data_part][video_id] = video_path_full

# Gom video paths từ cả hai thư mục
collect_video_paths_from_dir(videos_dir, all_video_paths)
# collect_video_paths_from_dir(videos_dir2, all_video_paths)

# Kiểm tra kết quả
print(all_video_paths)


{'L14': {'V001': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V001.mp4', 'V002': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V002.mp4', 'V003': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V003.mp4', 'V004': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V004.mp4', 'V005': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V005.mp4', 'V006': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V006.mp4', 'V007': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V007.mp4', 'V008': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V008.mp4', 'V009': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V009.mp4', 'V011': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V011.mp4', 'V012': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V012.mp4', 'V013': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V013.mp4', 'V014': 'D:/AIChallenge/data_sotuyen1/video/Videos_L14/video/L14_V014.mp4', 'V0

# Sampling Utils

In [3]:
def sample_frames_from_shot(start_idx, end_idx):
    '''
    intervals = np.linspace(start=start_idx, stop=end_idx, num=n_frames+1).astype(int)
    ranges = []
    for idx, interv in enumerate(intervals[:-1]):
        ranges.append((interv, intervals[idx + 1]))
    frame_idxs = [(x[0] + x[1]) // 2 for x in ranges]
    '''
    idx_first = start_idx
    idx_end = end_idx
    idx_03 = start_idx + int((end_idx-start_idx)/3)
    idx_06 = start_idx + int(2*(end_idx-start_idx)/3)
    frame_idxs = [idx_first, idx_03, idx_06, idx_end]
    return frame_idxs

# CutFrame

In [4]:
import os
import json
import cv2
from tqdm import tqdm

scene_json_dirs = 'D:/AIChallenge/data_sotuyen1/dataset_extraction/transnet/SceneJSON'
save_dir_all = 'D:/AIChallenge/data_sotuyen1/keyframe'

# Tạo thư mục lưu keyframe nếu chưa tồn tại
os.makedirs(save_dir_all, exist_ok=True)

# Xử lý chỉ các key từ 'L06' trở đi
for key in sorted(all_video_paths.keys()):
    if key != 'L14':  # Bỏ qua các key nhỏ hơn 'L06'
        continue
    
    save_dir = f'{save_dir_all}/{key}_extra'
    os.makedirs(save_dir, exist_ok=True)  # Tạo thư mục lưu nếu chưa tồn tại
    
    video_paths_dict = all_video_paths[key]
    video_ids = sorted(video_paths_dict.keys())
    
    for video_id in tqdm(video_ids, desc=f"Processing {key}"):
        if video_id < 'V006':
            continue  # Bỏ qua các video_id nhỏ hơn 'V020'
        video_path = video_paths_dict[video_id]
        video_scene_path = f'{scene_json_dirs}/{key}/{video_id}.json'
        
        # Kiểm tra sự tồn tại của file JSON trước khi mở
        if not os.path.exists(video_scene_path):
            print(f'Scene JSON file not found: {video_scene_path}')
            continue
        
        # Đọc thông tin cảnh (scene) từ file JSON
        with open(video_scene_path, 'r') as f:
            video_scenes = json.load(f)
        
        video_save_dir = f'{save_dir}/{video_id}'
        os.makedirs(video_save_dir, exist_ok=True)  # Tạo thư mục lưu frame nếu chưa có
        
        # Mở video bằng OpenCV
        cap = cv2.VideoCapture(video_path)
        
        # Kiểm tra xem video có mở thành công không
        if not cap.isOpened():
            print(f'Failed to open video: {video_path}')
            continue
        
        # Duyệt qua từng cảnh trong video
        for i, shot in enumerate(tqdm(video_scenes, desc=f"Processing scenes for {video_id}")):
            shot_frames_id = sample_frames_from_shot(shot[0], shot[1])
            
            # Duyệt qua các frame trong cảnh và lưu lại
            for index in shot_frames_id:
                cap.set(cv2.CAP_PROP_POS_FRAMES, index)
                filename = "{}/{:0>6d}.jpg".format(video_save_dir, index)
                
                ret, frame = cap.read()
                if ret:
                    if not cv2.imwrite(filename, frame):
                        print(f'Failed to save frame {index} for {video_id}')
                else:
                    print(f'Failed to capture frame {index} for {video_id}')
        
        cap.release()  # Giải phóng tài nguyên sau khi xử lý video


Processing L14:   0%|          | 0/27 [00:00<?, ?it/s]

Processing scenes for V006: 100%|██████████| 408/408 [03:07<00:00,  2.18it/s]
Processing scenes for V007: 100%|██████████| 353/353 [02:22<00:00,  2.48it/s]
Processing scenes for V008: 100%|██████████| 368/368 [02:11<00:00,  2.79it/s]
Processing scenes for V009: 100%|██████████| 385/385 [02:59<00:00,  2.15it/s]
Processing scenes for V011: 100%|██████████| 372/372 [06:04<00:00,  1.02it/s]
Processing scenes for V012: 100%|██████████| 388/388 [02:17<00:00,  2.82it/s]
Processing scenes for V013: 100%|██████████| 305/305 [01:49<00:00,  2.78it/s]
Processing scenes for V014: 100%|██████████| 373/373 [01:56<00:00,  3.21it/s]
Processing scenes for V015: 100%|██████████| 311/311 [01:51<00:00,  2.80it/s]
Processing scenes for V016: 100%|██████████| 307/307 [01:40<00:00,  3.05it/s]
Processing scenes for V017: 100%|██████████| 333/333 [01:47<00:00,  3.09it/s]
Processing scenes for V018: 100%|██████████| 278/278 [01:47<00:00,  2.60it/s]
Processing scenes for V019: 100%|██████████| 391/391 [02:17<00:0