## Experiment 1

### Imports

Import relevant modules

In [60]:
import torch
from typing import Dict
import json
import urllib
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
) 
import importlib
import configs

In [61]:
importlib.reload(configs)

<module 'configs' from '/Users/tanushreebanerjee/Desktop/COS429/cos429-final-project/utils/configs.py'>

### Setup

Load model

In [62]:
# Choose the `slowfast_r50` model 
model = torch.hub.load(configs.MODEL_REPO, configs.MODEL_NAME, pretrained=True)

Using cache found in /Users/tanushreebanerjee/.cache/torch/hub/facebookresearch_pytorchvideo_main


Set the model to eval mode and move to desired device.

In [63]:
# Set to GPU or CPU
device = configs.DEVICE
model = model.eval()
model = model.to(device)

Download the id to label mapping for the Kinetics 400 dataset on which the torch hub models were trained.  
This will be used to get the category label names from the predicted class ids.

In [64]:
json_url = configs.KINETICS_400_DATASET_URL
json_filename = configs.KINETICS_400_DATASET_FILENAME
try: urllib.URLopener().retrieve(json_url, json_filename)
except: urllib.request.urlretrieve(json_url, json_filename)

In [65]:
with open(json_filename, "r") as f:
    kinetics_classnames = json.load(f)

# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

### Define input transform

In [66]:
class PackPathway(torch.nn.Module):
    """
    Transform for converting video frames as a list of tensors. 
    """
    def __init__(self):
        super().__init__()
        
    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // configs.slowfast_alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list

transform =  ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(configs.num_frames),
            Lambda(lambda x: x/255.0),
            NormalizeVideo(configs.mean, configs.std),
            ShortSideScale(
                size=configs.side_size
            ),
            CenterCropVideo(configs.crop_size),
            PackPathway()
        ]
    ),
)

# The duration of the input clip is also specific to the model.
clip_duration = (configs.num_frames * configs.sampling_rate)/configs.frames_per_second


### Run Inference
Download an example video.

In [67]:
url_link = configs.SAMPLE_VIDEO_URL
video_path = configs.SAMPLE_VIDEO_PATH
try: urllib.URLopener().retrieve(url_link, video_path)
except: urllib.request.urlretrieve(url_link, video_path)