In [1]:
!pip install ffmpeg-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [2]:
from google.colab import drive
drive.mount ('/content/drive')

Mounted at /content/drive


In [3]:
from pathlib import Path
import ffmpeg
from multiprocessing import Pool


def trim(in_file, out_file, start, end):
    if out_file.exists():
        out_file.unlink()

    in_file = str(in_file)
    out_file = str(out_file)

    in_file_probe_result = ffmpeg.probe(in_file)
    in_file_duration = in_file_probe_result.get(
        "format", {}).get("duration", None)
    video_info = next(s for s in in_file_probe_result['streams']
                      if s['codec_type'] == 'video')
    fps = video_info['r_frame_rate'].split('/')
    if len(fps) == 2:
        fps = int(fps[0]) / int(fps[1])
    print(in_file_duration, fps)

    input_stream = ffmpeg.input(in_file)

    pts = "PTS-STARTPTS"
    video = input_stream.trim(start_frame=start, end_frame=end).setpts(pts)

    audio_start = start / fps
    audio_end = end / fps
    audio = (input_stream
             .filter_("atrim", start=audio_start, end=audio_end)
             .filter_("asetpts", pts))
    video_and_audio = ffmpeg.concat(video, audio, v=1, a=1).node
    output = ffmpeg.output(
        video_and_audio[0], video_and_audio[1], out_file, format="webm")
    output.run()

    out_file_probe_result = ffmpeg.probe(out_file)
    out_file_duration = out_file_probe_result.get(
        "format", {}).get("duration", None)
    print(out_file_duration)

In [7]:
def main():
    transnetv2_result = Path("/content/drive/MyDrive/KLTN2022_ThanhHa/source/TransNetV2/inference/transnetv2_msum")
    scene_dir = Path("/content/drive/MyDrive/KLTN2022_ThanhHa/dataset/TRECVID_MSUM_2022/scenes")

    save_dir = Path("/content/drive/MyDrive/KLTN2022_ThanhHa/dataset/TRECVID_MSUM_2022/shots")

    for p1 in sorted(transnetv2_result.iterdir()):
        movie_name = p1.name

        for p2 in sorted(p1.glob("**/*.scenes.txt")):
            scene_name = p2.name.replace(".scenes.txt", "")
            scene_path = scene_dir / movie_name / scene_name

            with p2.open("r") as f:
                segments = f.readlines()
                segments = list(map(lambda x: x.strip().split(), segments))

            _segments = []
            for idx, segment in enumerate(segments):
                save_path = save_dir / movie_name / scene_name.replace(".webm", "") / \
                    (scene_name.replace(".webm", "") + f"-shot_{idx}.webm")
                if save_path.exists():
                    print(f"Skipping {save_path.name}")
                    continue
                save_path.parent.mkdir(parents=True, exist_ok=True)

                _segments.append(
                    (scene_path, save_path, int(segment[0]), int(segment[1])))

            with Pool(min(16, len(segment))) as p:
                p.starmap(trim, _segments)


if __name__ == "__main__":
    main()

Skipping Calloused_Hands-1-shot_0.webm
Skipping Calloused_Hands-1-shot_1.webm
Skipping Calloused_Hands-1-shot_2.webm
Skipping Calloused_Hands-1-shot_3.webm
Skipping Calloused_Hands-1-shot_4.webm
Skipping Calloused_Hands-1-shot_5.webm
Skipping Calloused_Hands-1-shot_6.webm
Skipping Calloused_Hands-1-shot_7.webm
Skipping Calloused_Hands-1-shot_8.webm
Skipping Calloused_Hands-1-shot_9.webm
Skipping Calloused_Hands-1-shot_10.webm
Skipping Calloused_Hands-1-shot_11.webm
Skipping Calloused_Hands-1-shot_12.webm
Skipping Calloused_Hands-1-shot_13.webm
Skipping Calloused_Hands-1-shot_14.webm
Skipping Calloused_Hands-1-shot_15.webm
Skipping Calloused_Hands-1-shot_16.webm
Skipping Calloused_Hands-1-shot_17.webm
Skipping Calloused_Hands-1-shot_18.webm
Skipping Calloused_Hands-1-shot_19.webm
Skipping Calloused_Hands-1-shot_20.webm
Skipping Calloused_Hands-1-shot_21.webm
Skipping Calloused_Hands-1-shot_22.webm
Skipping Calloused_Hands-1-shot_23.webm
Skipping Calloused_Hands-10-shot_0.webm
Skipping C