In [11]:
import torch
import pandas as pd
import os
from typing import Dict
import json
import urllib
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
)

In [12]:
# Check if CUDA (GPU) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [14]:
# Load the model
model = torch.hub.load('facebookresearch/pytorchvideo', 'slowfast_r50', pretrained=True)

model = model.eval().to(device)

# URL to the JSON file containing class names mapping
json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
json_filename = "kinetics_classnames.json"
try:
    urllib.URLopener().retrieve(json_url, json_filename)
except:
    urllib.request.urlretrieve(json_url, json_filename)

# Load class name mapping
with open(json_filename, "r") as f:
    kinetics_classnames = json.load(f)

# Create a mapping from class names to class IDs
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

# Define input transform
side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 256
num_frames = 32
sampling_rate = 2
frames_per_second = 30
slowfast_alpha = 4
num_clips = 10
num_crops = 3

class PackPathway(torch.nn.Module):
    """
    Transform for converting video frames as a list of tensors.
    """
    def __init__(self):
        super().__init__()

    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // slowfast_alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list

transform = ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(num_frames),
            Lambda(lambda x: x / 255.0),
            NormalizeVideo(mean, std),
            ShortSideScale(
                size=side_size
            ),
            CenterCropVideo(crop_size),
            PackPathway()
        ]
    ),
)

# Load video files from a folder (replace with your video folder path)
video_folder = "C:/Users/andre/OneDrive/Documents/GitHub/Brainvision_Project/Motion_files/videos_processed"
video_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]

# Download the class names mapping for Kinetics 400 dataset
json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
json_filename = "kinetics_classnames.json"
try:
    urllib.URLopener().retrieve(json_url, json_filename)
except:
    urllib.request.urlretrieve(json_url, json_filename)

with open(json_filename, "r") as f:
    kinetics_classnames = json.load(f)

# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

# Initialize an empty DataFrame to store results
labels_df = pd.DataFrame(columns=["Video", "Top1_Label"])

# Iterate through video files and get top-1 labels
for video_file in video_files:
    video_id = os.path.splitext(video_file)[0]  # Extract video ID from filename

    # Initialize an EncodedVideo helper class and load the video
    video_path = os.path.join(video_folder, video_file)
    video = EncodedVideo.from_path(video_path)

    # Load the desired clip
    video_data = video.get_clip(start_sec=0, end_sec=num_frames / frames_per_second)

    # Apply the transformation and move inputs to the device
    video_data = transform(video_data)
    inputs = video_data["video"]
    inputs = [i.to(device)[None, ...] for i in inputs]

    # Pass the input clip through the model
    preds = model(inputs)

    # Get the predicted classes
    post_act = torch.nn.Softmax(dim=1)
    preds = post_act(preds)
    pred_class = preds.argmax(dim=1).item()

    # Map the predicted class to the label name
    top1_label = kinetics_id_to_classname[pred_class]

    # Append the result to the DataFrame
    labels_df = pd.concat([labels_df, pd.DataFrame({"Video": [video_id], "Top1_Label": [top1_label]})], ignore_index=True)

# Save the results to a CSV file
labels_df.to_csv("video_labels.csv", index=False)

print("Labels saved to video_labels.csv")

Using cache found in C:\Users\andre/.cache\torch\hub\facebookresearch_pytorchvideo_main


Labels saved to video_labels.csv
