In [1]:
!pip install huggingface_hub



In [15]:
# 1. Install the dotenv library
!pip install python-dotenv huggingface_hub

import os
from dotenv import load_dotenv
from huggingface_hub import login

# 2. Load the .env file
# If the file is in /content/vision/my.env, use:
load_dotenv('/content/my.env')

# 3. Retrieve and use the token
hf_token = os.getenv('HF_TOKEN')

if hf_token:
    login(token=hf_token)
    print("Successfully logged in to Hugging Face!")
else:
    print("Error: HF_TOKEN not found in .env file.")



Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Successfully logged in to Hugging Face!


In [3]:
from __future__ import annotations

import argparse
from pathlib import Path
import zipfile

from huggingface_hub import snapshot_download


def parse_args(argv=None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Download the OpenGVLab/MVBench dataset from Hugging Face."
    )
    parser.add_argument(
        "--output-dir",
        type=Path,
        default=Path("mvbench_data"),
        help="Where to store the downloaded dataset files.",
    )
    parser.add_argument(
        "--repo-id",
        default="OpenGVLab/MVBench",
        help="Hugging Face dataset repo ID.",
    )
    parser.add_argument(
        "--revision",
        default=None,
        help="Optional git revision (branch/tag/commit).",
    )
    parser.add_argument(
        "--allow-patterns",
        default=None,
        help=(
            "Comma-separated list of glob patterns to include. "
            "Leave empty to download everything."
        ),
    )
    parser.add_argument(
        "--unzip-videos",
        action="store_true",
        help="Unzip all .zip files under the 'video' folder after download.",
    )
    return parser.parse_known_args(argv)[0]


def main() -> None:
    args = parse_args()
    output_dir = args.output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    allow_patterns = None
    if args.allow_patterns:
        allow_patterns = [p.strip() for p in args.allow_patterns.split(",") if p.strip()]

    # Download all files in the dataset repository (full snapshot).
    snapshot_download(
        repo_id=args.repo_id,
        repo_type="dataset",
        revision=args.revision,
        local_dir=str(output_dir),
        local_dir_use_symlinks=False,
        allow_patterns=allow_patterns,
    )

    if args.unzip_videos:
        video_dir = output_dir / "video"
        if video_dir.exists():
            zip_files = sorted(video_dir.rglob("*.zip"))
            for zip_path in zip_files:
                extract_to = zip_path.parent
                with zipfile.ZipFile(zip_path, "r") as zf:
                    zf.extractall(extract_to)
        else:
            print(f"Video folder not found at: {video_dir}")

    print(f"MVBench dataset downloaded to: {output_dir}")


if __name__ == "__main__":
    main()




Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 41 files:   0%|          | 0/41 [00:00<?, ?it/s]

MVBench dataset downloaded to: /content/mvbench_data


In [4]:
# Change directory to the video folder
%cd /content/mvbench_data/video

# Unzip all files quietly (-q) so the output doesn't lag your browser
# and delete the zip files afterward (-j can be used if you want to flatten structure)
!unzip -q "*.zip"

# Optional: Remove the zip files to save space once extracted
!rm *.zip

/content/mvbench_data/video

11 archives were successfully processed.


In [23]:
!git clone https://github.com/s973343/vision.git

Cloning into 'vision'...
remote: Enumerating objects: 23, done.[K
remote: Counting objects:   4% (1/23)[Kremote: Counting objects:   8% (2/23)[Kremote: Counting objects:  13% (3/23)[Kremote: Counting objects:  17% (4/23)[Kremote: Counting objects:  21% (5/23)[Kremote: Counting objects:  26% (6/23)[Kremote: Counting objects:  30% (7/23)[Kremote: Counting objects:  34% (8/23)[Kremote: Counting objects:  39% (9/23)[Kremote: Counting objects:  43% (10/23)[Kremote: Counting objects:  47% (11/23)[Kremote: Counting objects:  52% (12/23)[Kremote: Counting objects:  56% (13/23)[Kremote: Counting objects:  60% (14/23)[Kremote: Counting objects:  65% (15/23)[Kremote: Counting objects:  69% (16/23)[Kremote: Counting objects:  73% (17/23)[Kremote: Counting objects:  78% (18/23)[Kremote: Counting objects:  82% (19/23)[Kremote: Counting objects:  86% (20/23)[Kremote: Counting objects:  91% (21/23)[Kremote: Counting objects:  95% (22/23)[Kremote: Counting o

In [20]:
!pwd

/content/mvbench_data/video


In [24]:
%cd /content/vision

/content/vision


In [25]:
!pip install -r requirements.txt

Collecting git+https://github.com/openai/CLIP.git (from -r requirements.txt (line 32))
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-de_nsonc
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-de_nsonc
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting chromadb (from -r requirements.txt (line 7))
  Downloading chromadb-1.4.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting uuid (from -r requirements.txt (line 18))
  Downloading uuid-1.30.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting groq (from -r requirements.txt (line 20))
  Downloading groq-1.0.0-py3-none-any.whl.metadata (16 kB)
Collecting scenedetect (from -r requirements.txt (line 22))
  Downloading scenedetect-0.6.7.1-py3-none-any.whl.metadata (3.8 kB)
Collecting av (from -r

In [None]:
!python /content/vision/batch_mvbench.py --json-dir /content/mvbench_data/json --video-dir /content/mvbench_data/video

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    title           : (C) 2017 Twenty Billion Neurons GmbH, 20BN-Someting-Something-Dataset V2
    encoder         : Lavf56.40.101
  Duration: 00:00:02.00, start: 0.000000, bitrate: 165 kb/s
  Stream #0:0: Video: vp9 (Profile 0), yuv420p(tv), 360x240, SAR 1:1 DAR 3:2, 12 fps, 12 tbr, 1k tbn, 1k tbc (default)
Output #0, mp3, to 'data/audio/mvbench/action_antonym_171804.mp3':
[1;31mOutput file #0 does not contain any stream
[0mError: ffmpeg error (see stderr output for detail)
Elapsed: 0.10s

[22] action_antonym | 204524
--- Starting Phase 1: Ingestion & Pre-processing ---
-> Extracting and Transcribing Audio...
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl 

In [13]:
%cd /content/vision

/content/vision


In [14]:
!pwd

/content/vision
