In [None]:
cd /kaggle

In [None]:
from pathlib import Path
TMP_DIR = Path('temp')
TMP_DIR.mkdir(exist_ok=True)

In [None]:
import os
import requests
import zipfile
import json
import shutil

In [None]:
def zip_and_remove_folder(folder_path):
    # Tạo tên file zip từ tên folder
    zip_filename = f"{folder_path}.zip"

    # Tạo file zip từ folder
    shutil.make_archive(folder_path, 'zip', folder_path)

    # Xóa folder sau khi zip
    shutil.rmtree(folder_path)

    print(f"Folder '{folder_path}' đã được zip thành '{zip_filename}' và folder đã bị xóa.")    

In [None]:
import cv2
from PIL import Image
import os

def save_image(image, path):
    image.save(path)

def resize_or_pad(image, new_size):
    """
    Resize the image if width > height, otherwise pad with black on both sides.

    :param image: Input image (as a NumPy array).
    :param new_size: Tuple (new_width, new_height) indicating the target size for resizing or padding.
    :return: Modified image.
    """
    h, w = image.shape[:2]
    target_w, target_h = new_size

    if w > h:
        # Resize the image to new dimensions maintaining aspect ratio
        resized_image = cv2.resize(image, (target_w, target_h))
        return resized_image
    else:
        # Calculate the aspect ratio based on the target height
        scale = target_h / h
        resized_w = int(w * scale)
        resized_image = cv2.resize(image, (resized_w, target_h))

        # Add padding on both sides to reach target width
        padding_left = (target_w - resized_w) // 2
        padding_right = target_w - resized_w - padding_left

        padded_image = cv2.copyMakeBorder(resized_image, 0, 0, padding_left, padding_right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        return padded_image
    

def extract_video_frame(video_path, frame_folder, width=640, height=480):
    print(f'Extracting frames from {video_path}...', end='\n')
    video_name = os.path.basename(video_path).split('.')[0]
    video_name = video_name.replace(' ', '_')
    frame_out_dir = os.path.join(frame_folder, video_name)
    if not os.path.exists(frame_out_dir):
        os.makedirs(frame_out_dir)
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_cout = 0
    list_frame = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_cout % 7 != 0:
            frame_cout += 1
            continue
        if frame_cout % 700 == 0:
            print(f'Extracted {frame_cout} frames from {video_name}')
        
        # Convert the frame from BGR (OpenCV) to RGB (PIL)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Resize or pad the frame using the OpenCV-based function
        frame = resize_or_pad(frame, (width, height))
        
        # Convert back to PIL Image for saving
        frame = Image.fromarray(frame)
        
        frame_path = os.path.join(frame_out_dir, f'{frame_cout}.jpg')
        save_image(frame, frame_path)
        list_frame.append((frame_cout, frame_path))
        frame_cout += 1
    cap.release()
    # You need to define zip_and_remove_folder or handle zipping separately
    zip_and_remove_folder(frame_out_dir)
    print(f'Extracted {len(list_frame)} frames from {video_name}')


In [None]:
def wfile(folder, end='.mp4'):
    file_list = []
    for root, dirs, files in os.walk(folder):
        for f in files:
            if f.endswith(end):
                file_list.append(os.path.join(root, f))
    return file_list

In [None]:
FRAME_WIDTH = 640
FRAME_HEIGHT = 480

ROOT_DIR = '/kaggle/input'
ROOT_OUTPUT_DIR = '/kaggle/temp'
CSV_OUTPUT = '/kaggle/temp'

dataset_name="frames-l2-b2"
API={"username":"hkhnhduy","key":"3a6338877d4f0cb526f208f1ce7b1f3e"}

In [None]:
os.environ['KAGGLE_USERNAME'] = API["username"]
os.environ['KAGGLE_KEY'] = API["key"]

In [None]:
from multiprocessing import Pool
import multiprocessing as m

def extract(list_video, output_dir):
    with Pool(m.cpu_count()) as p:
        p.starmap(extract_video_frame, [(video, output_dir, FRAME_WIDTH, FRAME_HEIGHT) for video in list_video])

In [None]:
list_video = wfile(ROOT_DIR, '.mp4')
extract(list_video, ROOT_OUTPUT_DIR)

In [None]:
import os 
def wfile(folder, end='.mp4'):
    list_file = []
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith(end):
                list_file.append(os.path.join(root, file))
    return list_file

In [None]:
import cv2
import pandas as pd
import os

def get_fps(video_path):
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    vidcap.release()
    
    return fps

def videos_fps(videos_folder):
    videos_path = wfile(videos_folder)
    print(videos_path)
    videos_name = [os.path.basename(video).split('.')[0] for video in videos_path]
    videos_fps = [get_fps(video) for video in videos_path]
    
    fps_table = {
        'video_name':videos_name,
        'fps':videos_fps
    }
    
    df = pd.DataFrame(fps_table)
    return df

df = videos_fps('/kaggle/input')
df.to_csv(os.path.join(ROOT_OUTPUT_DIR, f'{dataset_name}.csv'), index=False)

In [None]:
data = {
  "title": dataset_name,
  "id": os.environ['KAGGLE_USERNAME']+"/"+dataset_name,
  "licenses": [
    {
      "name": "CC0-1.0"
    }
  ]
}
 
with open(os.path.join(ROOT_OUTPUT_DIR, 'dataset-metadata.json'), 'w') as f:
    json.dump(data, f)

In [None]:
pip install kaggle

In [None]:
!kaggle datasets create -p /kaggle/temp --dir-mode skip