In [1]:
import sys
import os

In [2]:
sys.path.append(os.path.abspath('../../app'))

In [3]:
!pip install huggingface-hub
!pip install matplotlib
!pip install datasets



In [4]:
from huggingface_hub import hf_hub_download
from core.feluda import Feluda
from core.models.media_factory import VideoFactory
import cv2
import tarfile
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

  from .autonotebook import tqdm as notebook_tqdm



### Dataset Structure Breakdown

- **UCF101_subset/**: The root directory containing the dataset.
  - **train/**: Contains training samples.
    - Each subdirectory under `train/` corresponds to a specific action class:
      - `BenchPress`
      - `BasketballDunk`
      - `BalanceBeam`
      - `ApplyLipstick`
      - `BabyCrawling`
      - `ApplyEyeMakeup`
      - `Archery`
      - `BandMarching`
      - `BaseballPitch`
      - `Basketball`
  - **test/**: Same as train.
  - **val/**: Same as train.

We'll take train subset as our example here.

In [5]:
# Downloading and extracting 

dataset_name = "UCF101_subset/train"
hf_dataset_identifier = "sayakpaul/ucf101-subset"
filename = "UCF101_subset.tar.gz"
file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")


with tarfile.open(file_path) as t:
     t.extractall(".")


### Initializing Feluda class with config file

We'll use two operators for this example. One for extracting embeddings and other for dimension reduction.

In [6]:
feluda = Feluda("classify-video.yml")
feluda.setup()

# Extracting operator name from config and getting operators from Feluda
classify_video_operator = feluda.operators.get()[feluda.config.operators.parameters[0].type] 

Installing packages for classify_video_zero_shot


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]


In [7]:
video_path = VideoFactory.make_from_file_on_disk(
        os.path.join(dataset_name, 'Archery', 'v_Archery_g01_c04.avi')
    )

# notebooks/UCF101_subset/train/Archery/v_Archery_g01_c04.avi

In [9]:
labels = os.listdir(f'{dataset_name}')

In [11]:
temp = classify_video_operator.run(video_path,labels)

In [12]:
temp

{'prediction': 'Archery',
 'probs': [2.565195427450817e-05,
  0.00012317295477259904,
  0.00017170717183034867,
  1.4688222336189938e-06,
  6.08291884418577e-07,
  2.1681134967366233e-06,
  0.9996010661125183,
  3.12440242851153e-05,
  1.676418469287455e-05,
  2.6119405447389e-05]}

In [13]:
labels

['BenchPress',
 'BasketballDunk',
 'BalanceBeam',
 'ApplyLipstick',
 'BabyCrawling',
 'ApplyEyeMakeup',
 'Archery',
 'BandMarching',
 'BaseballPitch',
 'Basketball']