# Download Data

> Module to download the dataset

In [None]:
# | default_exp preprocess.video_to_frames

In [None]:
# | hide
%reload_ext nb_black
%reload_ext autoreload
%autoreload 2
%env CUDA_VISIBLE_DEVICES=

from nbdev.showdoc import *
import sys

__root = "../../"
sys.path.append(__root)

In [None]:
# | export
from clip_video_classifier.cli import cli
from torch_snippets import *
from pytorchvideo.data.encoded_video import EncodedVideo
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo,
)

In [None]:
# | export


def get_transform(num_frames, side_size, crop_size):
    return ApplyTransformToKey(
        key="video",
        transform=Compose(
            [
                UniformTemporalSubsample(num_frames),
                Lambda(lambda x: x / 255.0),
                ShortSideScale(size=side_size),
                CenterCropVideo(crop_size=(crop_size, crop_size)),
            ]
        ),
    )


def video_to_frames(
    video_path,
    frames_path,
    start_sec: int,
    clip_duration: float,
    num_frames_per_sec: int = 10,
    side_size: int = 256,
    crop_size: int = 256,
):
    if exists(frames_path):
        Info(f"Skipping extraction for {frames_path}")
        return
    makedir(parent(frames_path))
    end_sec = start_sec + clip_duration
    video = EncodedVideo.from_path(video_path)
    video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)
    num_frames = clip_duration * num_frames_per_sec
    video_data = get_transform(num_frames, side_size, crop_size)(video_data)
    tensor_data = video_data["video"].permute(1, 0, 2, 3)
    dumpdill(tensor_data, frames_path)


@cli.command()
def extract_frames_for_annotations(
    annotations_path,
    videos_folder,
    frames_folder,
    num_frames_per_sec: int = 5,
    side_size: int = 256,
    crop_size: int = 256,
    exclude_others: bool = True,
    row_index: int = None,
):
    videos_folder = P(videos_folder)
    frames_folder = P(frames_folder)
    annotations = pd.read_csv(annotations_path).rename({"class": "activity"}, axis=1)
    if exclude_others:
        annotations = annotations.query('activity != "others"')
    for _, row in (
        tracker := track2(
            annotations.sort_values("video").iterrows(), total=len(annotations)
        )
    ):
        if row_index is not None and row.name != row_index:
            continue
        video = videos_folder / f"{row.video}.mp4"
        frames = frames_folder / f"{row.name}.frames.tensor"
        tracker.send(f"Processing {frames}")
        video_to_frames(
            video,
            frames,
            row.start,
            row.clip_duration,
            num_frames_per_sec=num_frames_per_sec,
            side_size=side_size,
            crop_size=crop_size,
        )

In [None]:
root = P("/mnt/347832F37832B388/ml-datasets/ssbd/")
annotations_path = root / "annotations.csv"
annotations = pd.read_csv(annotations_path).rename({"class": "activity"}, axis=1)

Usage for a single video
```python
root = P("/mnt/347832F37832B388/ml-datasets/ssbd/")
videos_folder = root / "ssbd-raw-videos"
frames_folder = root / "ssbd-frames-5fps"

annotations = (
    pd.read_csv(root / "annotations.csv")
    .rename({"class": "activity"}, axis=1)
    .query('activity != "others"')
)
# row = choose(annotations)
show(row.to_frame().T)

video = videos_folder / f"{row.video}.mp4"
frames = frames_folder / f"{row.name}.frames.tensor"
video_to_frames(video, frames, row.start, row.clip_duration, num_frames_per_sec=5)
```
---
Usage for all rows in annotations

```python
root = P("/mnt/347832F37832B388/ml-datasets/ssbd/")
annotations_path = root / "annotations.csv"
videos_folder = root / "ssbd-raw-videos"
frames_folder = root / "ssbd-frames-5fps"
num_frames_per_sec = 5
exclude_others = True

extract_frames_for_annotations(
    annotations_path, videos_folder, frames_folder, num_frames_per_sec=5
)
```

In [None]:
import nbdev

nbdev.nbdev_export()