In [None]:
import os
import requests
import zipfile

In [None]:
list_of_urls = [
    "https://atm249497-s3user.vcos.cloudstorage.com.vn/aic24-b7/Videos_L07.zip",
]

In [None]:
def download_file(url, filename, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    response = requests.get(url, stream=True)
    # Hiện số mb đã tải
    total_length = response.headers.get('content-length')
    if total_length is None:
        with open(output_dir + filename, 'wb') as file:
            file.write(response.content)
    else:
        dl = 0
        total_length = int(total_length)
        with open(output_dir + filename, 'wb') as file:
            for data in response.iter_content(chunk_size=8192):
                dl += len(data)
                file.write(data)
                done = int(50 * dl / total_length)
                print("\r[%s%s] %s mb" % ('=' * done, ' ' * (50-done), round(dl/1000000, 2)), end='')
    print("\nDownload completed")

def extract_zip(zip_file, output_dir):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(output_dir)
    print("Extract completed")

    os.remove(zip_file)
    print("Remove zip file completed")

def download_and_extract(url, output_dir, filename = "data.zip"):
    download_file(url, filename, output_dir)
    extract_zip(output_dir + filename, output_dir)

In [None]:
import cv2
from PIL import Image

def save_image(image, path):
    image.save(path)

def extract_video_frame(video_path, frame_folder, width=640, height=480):
    print(f'Extracting frames from {video_path}...', end='\n')
    video_name = os.path.basename(video_path).split('.')[0]
    video_name = video_name.replace(' ', '_')
    frame_out_dir = os.path.join(frame_folder, video_name)
    if not os.path.exists(frame_out_dir):
        os.makedirs(frame_out_dir)
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_cout = 0
    list_frame = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_cout % 7 != 0:
            frame_cout += 1
            continue
        if frame_cout % 1000 == 0:
            print(f'Extracted {frame_cout} frames from {video_name}')
        frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((width, height))
        frame_path = os.path.join(frame_out_dir, f'{frame_cout}.jpg')
        save_image(frame, frame_path)
        list_frame.append((frame_cout, frame_path))
        frame_cout += 1
    cap.release()
    print(f'Extracted {len(list_frame)} frames from {video_name}')

In [None]:
def wfile(folder, end='.mp4'):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(end)]

In [None]:
FRAME_WIDTH = 640
FRAME_HEIGHT = 480

ROOT_DIR = '/kaggle/input'
ROOT_OUTPUT_DIR = '/kaggle/temp'

dataset_name="people"
API={"username":"tareksherif","key":"f4cf963ba526c529b3a9b0ea5058e6f0"}

In [None]:
os.environ['KAGGLE_USERNAME'] = API["username"]
os.environ['KAGGLE_KEY'] = API["key"]

In [1]:
from multiprocessing import Pool
import multiprocessing as m

def extract(list_video, output_dir):
    with Pool(m.cpu_count()) as p:
        p.starmap(extract_video_frame, [(video, output_dir, FRAME_WIDTH, FRAME_HEIGHT) for video in list_video])

In [None]:
for url in list_of_urls:
    print(f"Downloading and extracting {url}")
    download_and_extract(url, os.path.join(ROOT_DIR, 'video'))
    list_video = wfile(ROOT_DIR, '.mp4')
    extract(list_video, ROOT_OUTPUT_DIR)
    print(f"Completed {url}")
    print("---------------------------------------------------")

In [None]:
data = {
  "title": dataset_name,
  "id": os.environ['KAGGLE_USERNAME']+"/"+dataset_name,
  "licenses": [
    {
      "name": "CC0-1.0"
    }
  ]
}
 
with open(os.path.join(ROOT_OUTPUT_DIR, 'dataset-metadata.json'), 'w') as f:
    json.dump(data, f)

In [None]:
list_file = [os.path.join(ROOT_OUTPUT_DIR, f) for f in os.listdir(ROOT_OUTPUT_DIR) if f.endswith('1')]
def zip_folder(folder, zip_file):
    with zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(folder, '..')))

#zip video folder
with Pool(m.cpu_count()) as p:
    p.starmap(zip_folder, [(f, f+'.zip') for f in list_file])