In [1]:
import os
!pip -q --upgrade install huggingface_hub


Usage:   
  pip <command> [options]

no such option: --upgrade


In [2]:
from typing import Optional, Union, List
from huggingface_hub import HfApi, hf_hub_url, snapshot_download
from huggingface_hub.utils import filter_repo_objects
from huggingface_hub.constants import REPO_TYPES
from os.path import join, split, exists
import os
from deep_utils import AsyncDownloadUtils


async def get_urls(
        repo_id: str,
        *,
        revision: Optional[str] = None,
        repo_type: Optional[str] = None,
        token: Optional[Union[bool, str]] = None,
        allow_patterns: Optional[Union[List[str], str]] = None,
        ignore_patterns: Optional[Union[List[str], str]] = None,
        download: bool = False,
        download_path: str = ".",
) -> List[str]:
    if repo_type is None:
        repo_type = "model"
    if repo_type not in REPO_TYPES:
        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")

    _api = HfApi()
    repo_info = _api.repo_info(
        repo_id=repo_id,
        repo_type=repo_type,
        revision=revision,
        token=token,
    )
    assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
    filtered_repo_files = list(
        filter_repo_objects(
            items=[f.rfilename for f in repo_info.siblings],
            allow_patterns=allow_patterns,
            ignore_patterns=ignore_patterns,
        )
    )
    dl_urls = [hf_hub_url(repo_id, f) for f in filtered_repo_files]
    if repo_type == "dataset":
        dl_urls = [item.replace("https://huggingface.co", "https://huggingface.co/datasets") for item in dl_urls]
    if download:
        download_path = join(download_path, repo_id.split("//")[1])
        os.makedirs(download_path, exist_ok=True)
        await AsyncDownloadUtils.download_urls(dl_urls, download_path, remove_to_get_local_file_path=f"https://huggingface.co/{repo_type}s/{repo_id}/resolve/main/")
    return dl_urls

In [3]:
from dotenv import load_dotenv
import os


load_dotenv()
login_token = os.getenv("login_token")
urls = get_urls("wanglab/CT_DeepLesion-MedSAM2", token=login_token, repo_type="dataset")
print(urls)

<coroutine object get_urls at 0x7ae9a8891900>


Each of the links can be downloaded individually. 