In [None]:
import json
import os
from moviepy.editor import VideoFileClip
import time
# 视频文件目录前缀
video_prefix = "/data/hypertext/kangheng/howto100m/download/videos/Howto-Interlink7M_subset_w_all_clips_train/"
tsv_input="/data/hypertext/kangheng/howto100m/Interlink7M_tsv/Howto-Interlink7M_subset_w_all_clips_train.tsv"

# 当前日期时间
current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
# 错误日志文件路径
error_log_path = "log/current_time.txt"

# 时间字符串转换为秒
def time_str_to_seconds(time_str):
    h, m, s = map(int, time_str.split(':'))
    return h * 3600 + m * 60 + s

def log_error(message):
    with open(error_log_path, 'a') as error_file:
        error_file.write(message + '\n')

# 读取TSV文件
with open(tsv_input, 'r') as file:
    for line in file:
        video_file, clips_data, _ = line.strip().split("\t")
        video_file=video_file.split("/")[-1]
        if video_file=="video":
            continue
        clips_data=clips_data.replace('""','"')[1:-1]
        clips_json = json.loads(clips_data)

        # 完整视频路径
        full_video_path = os.path.join(video_prefix, video_file[:-4], video_file)

        # 如果视频文件不存在，记录到错误日志并跳过
        if not os.path.exists(full_video_path):
            error_message = f"Video file not found: {full_video_path}"
            log_error(error_message)
            continue

        try:
            # 加载视频文件，准备剪辑处理
            video = VideoFileClip(full_video_path)
            video_duration = video.duration

            # 处理每个片段
            for index, clip_info in enumerate(clips_json):
                start_time, end_time = clip_info['clip'].split(' - ')
                start_seconds = time_str_to_seconds(start_time)
                end_seconds = time_str_to_seconds(end_time)

                # 如果结束时间超出视频时长，则将其设为视频时长
                if end_seconds > video_duration:
                    end_seconds = video_duration

                # 剪辑片段并保存为新文件
                clip = video.subclip(start_seconds, end_seconds)
                clip_filename = "clip_"+clip_info['clip_id']+".mp4"
                clip_directory = os.path.join(video_prefix, video_file[:-4])  # 建议每个视频一个文件夹
                target_path = os.path.join(clip_directory, clip_filename)
                clip.write_videofile(target_path, codec="libx264", audio_codec="aac")

            # 释放视频文件资源
            video.close()

        except Exception as e:
            error_message = f"Error processing file {video_file}: {e}"
            log_error(error_message)

