In [1]:
!pip install huggingface_hub



In [15]:
# 1. Install the dotenv library
!pip install python-dotenv huggingface_hub

import os
from dotenv import load_dotenv
from huggingface_hub import login

# 2. Load the .env file
# If the file is in /content/vision/my.env, use:
load_dotenv('/content/my.env')

# 3. Retrieve and use the token
hf_token = os.getenv('HF_TOKEN')

if hf_token:
    login(token=hf_token)
    print("Successfully logged in to Hugging Face!")
else:
    print("Error: HF_TOKEN not found in .env file.")



Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Successfully logged in to Hugging Face!


In [3]:
from __future__ import annotations

import argparse
from pathlib import Path
import zipfile

from huggingface_hub import snapshot_download


def parse_args(argv=None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Download the OpenGVLab/MVBench dataset from Hugging Face."
    )
    parser.add_argument(
        "--output-dir",
        type=Path,
        default=Path("mvbench_data"),
        help="Where to store the downloaded dataset files.",
    )
    parser.add_argument(
        "--repo-id",
        default="OpenGVLab/MVBench",
        help="Hugging Face dataset repo ID.",
    )
    parser.add_argument(
        "--revision",
        default=None,
        help="Optional git revision (branch/tag/commit).",
    )
    parser.add_argument(
        "--allow-patterns",
        default=None,
        help=(
            "Comma-separated list of glob patterns to include. "
            "Leave empty to download everything."
        ),
    )
    parser.add_argument(
        "--unzip-videos",
        action="store_true",
        help="Unzip all .zip files under the 'video' folder after download.",
    )
    return parser.parse_known_args(argv)[0]


def main() -> None:
    args = parse_args()
    output_dir = args.output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    allow_patterns = None
    if args.allow_patterns:
        allow_patterns = [p.strip() for p in args.allow_patterns.split(",") if p.strip()]

    # Download all files in the dataset repository (full snapshot).
    snapshot_download(
        repo_id=args.repo_id,
        repo_type="dataset",
        revision=args.revision,
        local_dir=str(output_dir),
        local_dir_use_symlinks=False,
        allow_patterns=allow_patterns,
    )

    if args.unzip_videos:
        video_dir = output_dir / "video"
        if video_dir.exists():
            zip_files = sorted(video_dir.rglob("*.zip"))
            for zip_path in zip_files:
                extract_to = zip_path.parent
                with zipfile.ZipFile(zip_path, "r") as zf:
                    zf.extractall(extract_to)
        else:
            print(f"Video folder not found at: {video_dir}")

    print(f"MVBench dataset downloaded to: {output_dir}")


if __name__ == "__main__":
    main()




Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 41 files:   0%|          | 0/41 [00:00<?, ?it/s]

MVBench dataset downloaded to: /content/mvbench_data


In [4]:
# Change directory to the video folder
%cd /content/mvbench_data/video

# Unzip all files quietly (-q) so the output doesn't lag your browser
# and delete the zip files afterward (-j can be used if you want to flatten structure)
!unzip -q "*.zip"

# Optional: Remove the zip files to save space once extracted
!rm *.zip

/content/mvbench_data/video

11 archives were successfully processed.
