In [1]:
import os


model_id = os.environ.get("PARAM_NAME")
if not model_id:
    raise ValueError("Missing required environment variable PARAM_NAME. Set `params: {name: hf_org/model_id} in the model spec` ")

output_dir = os.environ.get("OUTPUT_DIR", "/content/model")

# snapshot_download(repo_id=model_id, local_dir=output_dir, local_dir_use_symlinks=False, revision="main")

In [2]:
from huggingface_hub.hf_api import model_info
from utils import filter_files

model = model_info(model_id)

files = os.environ.get("PARAM_FILES", "")
if files:
    filenames = [f.strip() for f in files.split(",")]
else:
    filenames = [f.rfilename for f in model.siblings ]
    filenames = filter_files(filenames)
filenames

  from .autonotebook import tqdm as notebook_tqdm


['.gitattributes',
 'README.md',
 'config.json',
 'configuration_RW.py',
 'coreml/text-generation/falcon-7b-64-float32.mlpackage/Data/com.apple.CoreML/model.mlmodel',
 'coreml/text-generation/falcon-7b-64-float32.mlpackage/Data/com.apple.CoreML/weights/weight.bin',
 'coreml/text-generation/falcon-7b-64-float32.mlpackage/Manifest.json',
 'generation_config.json',
 'handler.py',
 'modelling_RW.py',
 'pytorch_model-00001-of-00002.bin',
 'pytorch_model-00002-of-00002.bin',
 'pytorch_model.bin.index.json',
 'special_tokens_map.json',
 'tokenizer.json',
 'tokenizer_config.json']

In [6]:
import urllib.request
from pathlib import Path
from huggingface_hub import hf_hub_url
from concurrent.futures import ThreadPoolExecutor, as_completed

token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if token:
    opener = urllib.request.build_opener()
    opener.addheaders = [('authorization', f"Bearer {token}")]
    urllib.request.install_opener(opener)

def download_file(filename: str) -> str:
    destination = f"{output_dir}/{filename}"
    print(f"Downloading {filename} to {destination}")
    url = hf_hub_url(model_id, filename)
    Path(destination).parent.mkdir(exist_ok=True, parents=True)
    urllib.request.urlretrieve(url, destination)
    return destination

processes = []
with ThreadPoolExecutor(max_workers=len(filenames)) as executor:
    for filename in filenames:
        processes.append(executor.submit(download_file, filename))

for task in as_completed(processes):
    print(f"Finished downloading {task.result()}")

Downloading .gitattributes to /content/model/.gitattributes
Downloading README.md to /content/model/README.md
Downloading config.json to /content/model/config.json
Downloading configuration_RW.py to /content/model/configuration_RW.py
Downloading generation_config.json to /content/model/generation_config.json
Downloading handler.py to /content/model/handler.py
Downloading modelling_RW.py to /content/model/modelling_RW.py
Downloading pytorch_model-00001-of-00002.bin to /content/model/pytorch_model-00001-of-00002.bin
Downloading pytorch_model-00002-of-00002.bin to /content/model/pytorch_model-00002-of-00002.bin
Downloading pytorch_model.bin.index.json to /content/model/pytorch_model.bin.index.json
Downloading special_tokens_map.json to /content/model/special_tokens_map.json
Downloading tokenizer.json to /content/model/tokenizer.json
Downloading tokenizer_config.json to /content/model/tokenizer_config.json
Finished downloading /content/model/pytorch_model-00002-of-00002.bin
Finished downlo

In [7]:
! ls -lash /content/model

total 14G
4.0K drwxr-xr-x 2 root root 4.0K Aug  4 04:41 .
8.0K drwxr-xr-x 1 root root 4.0K Aug  4 04:41 ..
4.0K -rw-r--r-- 1 root root 1.5K Aug  4 04:41 .gitattributes
 12K -rw-r--r-- 1 root root 9.6K Aug  4 04:41 README.md
4.0K -rw-r--r-- 1 root root  667 Aug  4 04:41 config.json
4.0K -rw-r--r-- 1 root root 2.6K Aug  4 04:41 configuration_RW.py
4.0K -rw-r--r-- 1 root root  111 Aug  4 04:41 generation_config.json
4.0K -rw-r--r-- 1 root root 1.2K Aug  4 04:41 handler.py
 48K -rw-r--r-- 1 root root  47K Aug  4 04:41 modelling_RW.py
9.3G -rw-r--r-- 1 root root 9.3G Aug  4 04:43 pytorch_model-00001-of-00002.bin
4.2G -rw-r--r-- 1 root root 4.2G Aug  4 04:42 pytorch_model-00002-of-00002.bin
 20K -rw-r--r-- 1 root root  17K Aug  4 04:41 pytorch_model.bin.index.json
4.0K -rw-r--r-- 1 root root  281 Aug  4 04:41 special_tokens_map.json
2.7M -rw-r--r-- 1 root root 2.7M Aug  4 04:41 tokenizer.json
4.0K -rw-r--r-- 1 root root  220 Aug  4 04:41 tokenizer_config.json
