In [None]:
!pip install tensorflow==2.1
!pip install ffmpeg
!pip install ffmpeg-python 
!pip install pillow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.1
  Downloading tensorflow-2.1.0-cp37-cp37m-manylinux2010_x86_64.whl (421.8 MB)
[K     |████████████████████████████████| 421.8 MB 16 kB/s 
[?25hCollecting tensorflow-estimator<2.2.0,>=2.1.0rc0
  Downloading tensorflow_estimator-2.1.0-py2.py3-none-any.whl (448 kB)
[K     |████████████████████████████████| 448 kB 57.1 MB/s 
[?25hCollecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Collecting scipy==1.4.1
  Downloading scipy-1.4.1-cp37-cp37m-manylinux1_x86_64.whl (26.1 MB)
[K     |████████████████████████████████| 26.1 MB 1.5 MB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 5.0 MB/s 
[?25hCollecting tensorboard<2.2.0,>=2.1.0
  Downloading tensorboard-2.1.1-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 41.5 MB/s 
Col

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6084 sha256=8a474bdaba5778588d7a581b7fe957f57f54ba4743072d7dfbecd98079b7cd70
  Stored in directory: /root/.cache/pip/wheels/64/80/6e/caa3e16deb0267c3cbfd36862058a724144e19fdb9eb03af0f
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!cd /content/drive/MyDrive/KLTN2022_ThanhHa/source/TransNetV2
!git lfs checkout

Error: Failed to call git rev-parse --git-dir --show-toplevel: "fatal: not a git repository (or any of the parent directories): .git\n"
Not in a git repository.


In [None]:
from google.colab import drive
drive.mount ('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import tensorflow as tf

In [None]:
class TransNetV2:

    def __init__(self, model_dir=None):
        if model_dir is None:
            model_dir = os.path.join(os.path.dirname(__file__), "transnetv2-weights/")
            if not os.path.isdir(model_dir):
                raise FileNotFoundError(
                    f"[TransNetV2] ERROR: {model_dir} is not a directory.")
            else:
                print(f"[TransNetV2] Using weights from {model_dir}.")

        self._input_size = (27, 48, 3)
        try:
            self._model = tf.saved_model.load(model_dir)
        except OSError as exc:
            raise IOError(f"[TransNetV2] It seems that files in {model_dir} are corrupted or missing. "
                          f"Re-download them manually and retry. For more info, see: "
                          f"https://github.com/soCzech/TransNetV2/issues/1#issuecomment-647357796") from exc

    def predict_raw(self, frames: np.ndarray):
        assert len(frames.shape) == 5 and frames.shape[2:] == self._input_size, \
            "[TransNetV2] Input shape must be [batch, frames, height, width, 3]."
        frames = tf.cast(frames, tf.float32)

        logits, dict_ = self._model(frames)
        single_frame_pred = tf.sigmoid(logits)
        all_frames_pred = tf.sigmoid(dict_["many_hot"])

        return single_frame_pred, all_frames_pred

    def predict_frames(self, frames: np.ndarray):
        assert len(frames.shape) == 4 and frames.shape[1:] == self._input_size, \
            "[TransNetV2] Input shape must be [frames, height, width, 3]."

        def input_iterator():
            # return windows of size 100 where the first/last 25 frames are from the previous/next batch
            # the first and last window must be padded by copies of the first and last frame of the video
            no_padded_frames_start = 25
            no_padded_frames_end = 25 + 50 - \
                (len(frames) % 50 if len(frames) % 50 != 0 else 50)  # 25 - 74

            start_frame = np.expand_dims(frames[0], 0)
            end_frame = np.expand_dims(frames[-1], 0)
            padded_inputs = np.concatenate(
                [start_frame] * no_padded_frames_start +
                [frames] + [end_frame] * no_padded_frames_end, 0
            )

            ptr = 0
            while ptr + 100 <= len(padded_inputs):
                out = padded_inputs[ptr:ptr + 100]
                ptr += 50
                yield out[np.newaxis]

        predictions = []

        for inp in input_iterator():
            single_frame_pred, all_frames_pred = self.predict_raw(inp)
            predictions.append((single_frame_pred.numpy()[0, 25:75, 0],
                                all_frames_pred.numpy()[0, 25:75, 0]))

            print("\r[TransNetV2] Processing video frames {}/{}".format(
                min(len(predictions) * 50, len(frames)), len(frames)
            ), end="")
        print("")

        single_frame_pred = np.concatenate(
            [single_ for single_, all_ in predictions])
        all_frames_pred = np.concatenate(
            [all_ for single_, all_ in predictions])

        # remove extra padded frames
        return single_frame_pred[:len(frames)], all_frames_pred[:len(frames)]

    def predict_video(self, video_fn: str):
        try:
            import ffmpeg
        except ModuleNotFoundError:
            raise ModuleNotFoundError("For `predict_video` function `ffmpeg` needs to be installed in order to extract "
                                      "individual frames from video file. Install `ffmpeg` command line tool and then "
                                      "install python wrapper by `pip install ffmpeg-python`.")

        print("[TransNetV2] Extracting frames from {}".format(video_fn))
        video_stream, err = ffmpeg.input(video_fn).output(
            "pipe:", format="rawvideo", pix_fmt="rgb24", s="48x27"
        ).run(capture_stdout=True, capture_stderr=True)

        video = np.frombuffer(video_stream, np.uint8).reshape([-1, 27, 48, 3])
        return (video, *self.predict_frames(video))

    @staticmethod
    def predictions_to_scenes(predictions: np.ndarray, threshold: float = 0.5):
        predictions = (predictions > threshold).astype(np.uint8)

        scenes = []
        t, t_prev, start = -1, 0, 0
        for i, t in enumerate(predictions):
            if t_prev == 1 and t == 0:
                start = i
            if t_prev == 0 and t == 1 and i != 0:
                scenes.append([start, i])
            t_prev = t
        if t == 0:
            scenes.append([start, i])

        # just fix if all predictions are 1
        if len(scenes) == 0:
            return np.array([[0, len(predictions) - 1]], dtype=np.int32)

        return np.array(scenes, dtype=np.int32)

    @staticmethod
    def visualize_predictions(frames: np.ndarray, predictions):
        from PIL import Image, ImageDraw

        if isinstance(predictions, np.ndarray):
            predictions = [predictions]

        ih, iw, ic = frames.shape[1:]
        width = 25

        # pad frames so that length of the video is divisible by width
        # pad frames also by len(predictions) pixels in width in order to show predictions
        pad_with = width - \
            len(frames) % width if len(frames) % width != 0 else 0
        frames = np.pad(frames, [(0, pad_with), (0, 1),
                        (0, len(predictions)), (0, 0)])

        predictions = [np.pad(x, (0, pad_with)) for x in predictions]
        height = len(frames) // width

        img = frames.reshape(
            [height, width, ih + 1, iw + len(predictions), ic])
        img = np.concatenate(np.split(
            np.concatenate(np.split(img, height), axis=2)[0], width
        ), axis=2)[0, :-1]

        img = Image.fromarray(img)
        draw = ImageDraw.Draw(img)

        # iterate over all frames
        for i, pred in enumerate(zip(*predictions)):
            x, y = i % width, i // width
            x, y = x * (iw + len(predictions)) + iw, y * (ih + 1) + ih - 1

            # we can visualize multiple predictions per single frame
            for j, p in enumerate(pred):
                color = [0, 0, 0]
                color[(j + 1) % 3] = 255

                value = round(p * (ih - 1))
                if value != 0:
                    draw.line((x + j, y, x + j, y - value),
                              fill=tuple(color), width=1)
        return img


In [None]:
def main():
    import sys
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("-f")
    parser.add_argument("--files", type=str, nargs="+", default= "/content/drive/MyDrive/KLTN2022_ThanhHa/dataset/TRECVID_MSUM_2022/scenes",
                        help="path to video files to process")
    parser.add_argument("--weights", type=str, default= "/content/drive/MyDrive/KLTN2022_ThanhHa/source/TransNetV2/inference/transnetv2-weights",
                        help="path to TransNet V2 weights, tries to infer the location if not specified")
    parser.add_argument("--save_dir", type=str, default= "/content/drive/MyDrive/KLTN2022_ThanhHa/source/TransNetV2/inference/transnetv2_msum",
                        help="Where to store txt output files. Default to the same folder as input file")
    parser.add_argument('--visualize', action="store_true", 
                        help="save a png file with prediction visualization for each extracted video")

    args = parser.parse_args()
    

    print(args.files)
    print(args.files[0])
    
    #folders = sorted(os.listdir(args.files[0]))
    folders = sorted(os.listdir(args.files))
    print(f"Found {len(folders)} folders")

    supported_exts = [".mp4", ".avi", ".webm"]

    model = TransNetV2(args.weights)


    for folder in folders:
        #folder_path = os.path.join(args.files[0], folder)
        folder_path = os.path.join(args.files, folder)
        print(f"Processing in folder {folder}")

        videos = sorted([p for p in os.listdir(folder_path) if os.path.splitext(p)[-1] in supported_exts])
        
        print(f"Found {len(videos)} videos inside {folder}")

        for file in videos:
            print(f"Processing video {file}")

            pred_path = file + ".predictions.txt"
            scene_path = file + ".scenes.txt"
            vis_path = file + ".vis.png"

            file_path = os.path.join(folder_path, file)

            if args.save_dir:
                if not os.path.exists(os.path.join(args.save_dir, folder)):
                    os.makedirs(os.path.join(args.save_dir, folder))
                pred_path = os.path.join(args.save_dir, folder, pred_path)
                scene_path = os.path.join(args.save_dir, folder, scene_path)
                vis_path = os.path.join(args.save_dir, folder, vis_path)

            if os.path.exists(pred_path) or os.path.exists(scene_path):
                print(f"[TransNetV2] {file}.predictions.txt or {file}.scenes.txt already exists. "
                      f"Skipping video {file}.", file=sys.stderr)
                continue

            video_frames, single_frame_predictions, all_frame_predictions = \
                model.predict_video(file_path)

            predictions = np.stack(
                [single_frame_predictions, all_frame_predictions], 1)
            np.savetxt(pred_path, predictions, fmt="%.6f")

            scenes = model.predictions_to_scenes(single_frame_predictions)
            np.savetxt(scene_path, scenes, fmt="%d")

            if args.visualize:
                if os.path.exists(vis_path):
                  print(f"[TransNetV2] {file}.vis.png already exists. "
                        f"Skipping visualization of video {file}.", file=sys.stderr)
                  continue

                pil_image = model.visualize_predictions(
                    video_frames, predictions=(single_frame_predictions, all_frame_predictions))
                pil_image.save(vis_path)


if __name__ == "__main__":
    main()

/content/drive/MyDrive/KLTN2022_ThanhHa/dataset/TRECVID_MSUM_2022/scenes
/
Found 5 folders
Processing in folder Calloused_Hands
Found 58 videos inside Calloused_Hands
Processing video Calloused_Hands-1.webm
Processing video Calloused_Hands-10.webm
Processing video Calloused_Hands-11.webm
Processing video Calloused_Hands-12.webm
Processing video Calloused_Hands-13.webm
Processing video Calloused_Hands-14.webm
Processing video Calloused_Hands-15.webm
Processing video Calloused_Hands-16.webm
Processing video Calloused_Hands-17.webm
Processing video Calloused_Hands-18.webm
Processing video Calloused_Hands-19.webm
Processing video Calloused_Hands-2.webm
Processing video Calloused_Hands-20.webm
Processing video Calloused_Hands-21.webm
Processing video Calloused_Hands-22.webm
Processing video Calloused_Hands-23.webm
Processing video Calloused_Hands-24.webm
Processing video Calloused_Hands-25.webm
Processing video Calloused_Hands-26.webm
Processing video Calloused_Hands-27.webm
Processing vide

[TransNetV2] Calloused_Hands-1.webm.predictions.txt or Calloused_Hands-1.webm.scenes.txt already exists. Skipping video Calloused_Hands-1.webm.
[TransNetV2] Calloused_Hands-10.webm.predictions.txt or Calloused_Hands-10.webm.scenes.txt already exists. Skipping video Calloused_Hands-10.webm.
[TransNetV2] Calloused_Hands-11.webm.predictions.txt or Calloused_Hands-11.webm.scenes.txt already exists. Skipping video Calloused_Hands-11.webm.
[TransNetV2] Calloused_Hands-12.webm.predictions.txt or Calloused_Hands-12.webm.scenes.txt already exists. Skipping video Calloused_Hands-12.webm.
[TransNetV2] Calloused_Hands-13.webm.predictions.txt or Calloused_Hands-13.webm.scenes.txt already exists. Skipping video Calloused_Hands-13.webm.
[TransNetV2] Calloused_Hands-14.webm.predictions.txt or Calloused_Hands-14.webm.scenes.txt already exists. Skipping video Calloused_Hands-14.webm.
[TransNetV2] Calloused_Hands-15.webm.predictions.txt or Calloused_Hands-15.webm.scenes.txt already exists. Skipping video

Processing video Calloused_Hands-55.webm
Processing video Calloused_Hands-56.webm
Processing video Calloused_Hands-57.webm
Processing video Calloused_Hands-58.webm
Processing video Calloused_Hands-6.webm
Processing video Calloused_Hands-7.webm
Processing video Calloused_Hands-8.webm
Processing video Calloused_Hands-9.webm
Processing in folder Liberty_Kid
Found 56 videos inside Liberty_Kid
Processing video Liberty_Kid-1.webm
Processing video Liberty_Kid-10.webm
Processing video Liberty_Kid-11.webm
Processing video Liberty_Kid-12.webm
Processing video Liberty_Kid-13.webm
Processing video Liberty_Kid-14.webm
Processing video Liberty_Kid-15.webm
Processing video Liberty_Kid-16.webm
Processing video Liberty_Kid-17.webm
Processing video Liberty_Kid-18.webm
Processing video Liberty_Kid-19.webm
Processing video Liberty_Kid-2.webm
Processing video Liberty_Kid-20.webm
Processing video Liberty_Kid-21.webm
Processing video Liberty_Kid-22.webm
Processing video Liberty_Kid-23.webm
Processing video L

[TransNetV2] Liberty_Kid-13.webm.predictions.txt or Liberty_Kid-13.webm.scenes.txt already exists. Skipping video Liberty_Kid-13.webm.
[TransNetV2] Liberty_Kid-14.webm.predictions.txt or Liberty_Kid-14.webm.scenes.txt already exists. Skipping video Liberty_Kid-14.webm.
[TransNetV2] Liberty_Kid-15.webm.predictions.txt or Liberty_Kid-15.webm.scenes.txt already exists. Skipping video Liberty_Kid-15.webm.
[TransNetV2] Liberty_Kid-16.webm.predictions.txt or Liberty_Kid-16.webm.scenes.txt already exists. Skipping video Liberty_Kid-16.webm.
[TransNetV2] Liberty_Kid-17.webm.predictions.txt or Liberty_Kid-17.webm.scenes.txt already exists. Skipping video Liberty_Kid-17.webm.
[TransNetV2] Liberty_Kid-18.webm.predictions.txt or Liberty_Kid-18.webm.scenes.txt already exists. Skipping video Liberty_Kid-18.webm.
[TransNetV2] Liberty_Kid-19.webm.predictions.txt or Liberty_Kid-19.webm.scenes.txt already exists. Skipping video Liberty_Kid-19.webm.
[TransNetV2] Liberty_Kid-2.webm.predictions.txt or Libe

Processing video like_me-15.webm
Processing video like_me-16.webm
Processing video like_me-17.webm
Processing video like_me-18.webm
Processing video like_me-19.webm
Processing video like_me-2.webm
Processing video like_me-20.webm
Processing video like_me-21.webm
Processing video like_me-22.webm
Processing video like_me-23.webm
Processing video like_me-24.webm
Processing video like_me-25.webm
Processing video like_me-26.webm
Processing video like_me-27.webm
Processing video like_me-28.webm
Processing video like_me-3.webm
Processing video like_me-4.webm
Processing video like_me-5.webm
Processing video like_me-6.webm
Processing video like_me-7.webm
Processing video like_me-8.webm
Processing video like_me-9.webm
Processing in folder Memphis
Found 47 videos inside Memphis
Processing video Memphis-1.webm
Processing video Memphis-10.webm
Processing video Memphis-11.webm
Processing video Memphis-12.webm
Processing video Memphis-13.webm
Processing video Memphis-14.webm
Processing video Memphis-

[TransNetV2] like_me-24.webm.predictions.txt or like_me-24.webm.scenes.txt already exists. Skipping video like_me-24.webm.
[TransNetV2] like_me-25.webm.predictions.txt or like_me-25.webm.scenes.txt already exists. Skipping video like_me-25.webm.
[TransNetV2] like_me-26.webm.predictions.txt or like_me-26.webm.scenes.txt already exists. Skipping video like_me-26.webm.
[TransNetV2] like_me-27.webm.predictions.txt or like_me-27.webm.scenes.txt already exists. Skipping video like_me-27.webm.
[TransNetV2] like_me-28.webm.predictions.txt or like_me-28.webm.scenes.txt already exists. Skipping video like_me-28.webm.
[TransNetV2] like_me-3.webm.predictions.txt or like_me-3.webm.scenes.txt already exists. Skipping video like_me-3.webm.
[TransNetV2] like_me-4.webm.predictions.txt or like_me-4.webm.scenes.txt already exists. Skipping video like_me-4.webm.
[TransNetV2] like_me-5.webm.predictions.txt or like_me-5.webm.scenes.txt already exists. Skipping video like_me-5.webm.
[TransNetV2] like_me-6.we

Processing video losing_ground-17.webm
Processing video losing_ground-18.webm
Processing video losing_ground-19.webm
Processing video losing_ground-2.webm
Processing video losing_ground-20.webm
Processing video losing_ground-21.webm
Processing video losing_ground-22.webm
Processing video losing_ground-23.webm
Processing video losing_ground-24.webm
Processing video losing_ground-25.webm
Processing video losing_ground-26.webm
Processing video losing_ground-27.webm
Processing video losing_ground-28.webm
Processing video losing_ground-29.webm
Processing video losing_ground-3.webm
Processing video losing_ground-30.webm
Processing video losing_ground-31.webm
Processing video losing_ground-32.webm
Processing video losing_ground-33.webm
Processing video losing_ground-34.webm
Processing video losing_ground-35.webm
Processing video losing_ground-36.webm
Processing video losing_ground-37.webm
Processing video losing_ground-38.webm
Processing video losing_ground-39.webm
Processing video losing_gro

[TransNetV2] losing_ground-22.webm.predictions.txt or losing_ground-22.webm.scenes.txt already exists. Skipping video losing_ground-22.webm.
[TransNetV2] losing_ground-23.webm.predictions.txt or losing_ground-23.webm.scenes.txt already exists. Skipping video losing_ground-23.webm.
[TransNetV2] losing_ground-24.webm.predictions.txt or losing_ground-24.webm.scenes.txt already exists. Skipping video losing_ground-24.webm.
[TransNetV2] losing_ground-25.webm.predictions.txt or losing_ground-25.webm.scenes.txt already exists. Skipping video losing_ground-25.webm.
[TransNetV2] losing_ground-26.webm.predictions.txt or losing_ground-26.webm.scenes.txt already exists. Skipping video losing_ground-26.webm.
[TransNetV2] losing_ground-27.webm.predictions.txt or losing_ground-27.webm.scenes.txt already exists. Skipping video losing_ground-27.webm.
[TransNetV2] losing_ground-28.webm.predictions.txt or losing_ground-28.webm.scenes.txt already exists. Skipping video losing_ground-28.webm.
[TransNetV2] 