In [1]:
import subprocess
import json
import os
import math
import csv

def get_video_duration(input_file):
    cmd = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', input_file]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    return float(data['format']['duration'])

def split_video(input_file, save_dir, chunk_path='video_chunk', num_chunks=10, overlap=5):
    duration = get_video_duration(input_file)
    chunk_duration = math.floor((duration - overlap * (num_chunks - 1)) / num_chunks)
    
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        
    root_dir = input_file.rsplit(".", 1)[0]
    save_path = os.path.join(save_dir, root_dir, chunk_path)
    os.makedirs(save_path, exist_ok=True)
    
    csv_file_path = os.path.join(save_path, f"{chunk_path}_{num_chunks}_scenes.csv")
    
    with open(csv_file_path, mode = 'w', newline = '') as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(['Video Segment', 'data_start', 'data_end'])
        
        for i in range(num_chunks):
            start_time = i * chunk_duration if i == 0 else i * chunk_duration - overlap
            end_time = start_time + chunk_duration + (overlap if i != 0 else overlap)
            output_file = f'{save_path}/chunk_{i+1}.mp4'
            
            cmd = [
                'ffmpeg',
                '-y',
                '-i', input_file,
                '-ss', str(math.floor(start_time)),
                '-to', str(math.floor(end_time)),
                '-r', '30',
                '-c', 'copy',
                output_file
            ]
            
            subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            print(f'Created {output_file} with start time {math.floor(start_time)} and end time {math.floor(end_time)}')
            csv_writer.writerow([i, math.floor(start_time), math.floor(start_time) + 27])
# Usage
input_file = 'sample_tv360.mp4'
save_dir = 'data'
split_video(input_file, save_dir)


Created data/sample_tv360/video_chunk/chunk_1.mp4 with start time 0 and end time 81
Created data/sample_tv360/video_chunk/chunk_2.mp4 with start time 71 and end time 152
Created data/sample_tv360/video_chunk/chunk_3.mp4 with start time 147 and end time 228
Created data/sample_tv360/video_chunk/chunk_4.mp4 with start time 223 and end time 304
Created data/sample_tv360/video_chunk/chunk_5.mp4 with start time 299 and end time 380
Created data/sample_tv360/video_chunk/chunk_6.mp4 with start time 375 and end time 456
Created data/sample_tv360/video_chunk/chunk_7.mp4 with start time 451 and end time 532
Created data/sample_tv360/video_chunk/chunk_8.mp4 with start time 527 and end time 608
Created data/sample_tv360/video_chunk/chunk_9.mp4 with start time 603 and end time 684
Created data/sample_tv360/video_chunk/chunk_10.mp4 with start time 679 and end time 760


In [2]:
import subprocess
import json

# Đường dẫn đến video
video_path = '/home/thiendc/projects/video_summarization/data/sample_tv360/video_raw/video_raw_1.mp4'

# Lệnh ffprobe để lấy thông tin video dưới dạng JSON
ffprobe_cmd = [
    'ffprobe',
    '-v', 'quiet',
    '-print_format', 'json',
    '-show_format',
    '-show_streams',
    video_path
]

# Thực thi lệnh
result = subprocess.run(ffprobe_cmd, capture_output=True, text=True)
info = json.loads(result.stdout)

# Lấy thời lượng video
duration = float(info['format']['duration'])
print(f"Video length: {duration:.3f} seconds")


Video length: 81.000 seconds
