# Environment Setup
Sets up the environment variable for Hugging Face file transfer protocol. 

This ensures efficient model downloads using the newer transfer protocol.

In [1]:
# Environment Setup

# Import the os module to interact with the operating system
import os

from dotenv import load_dotenv

load_dotenv()

os.environ["HF_HUB_OFFLINE"] = "0"
os.environ["HF_DATASETS_OFFLINE"] = "0"

# Model Download

Imports and uses the custom utility function to load model configurations from settings. 

This determines which models need to be downloaded.

In [2]:
# Model Configuration Loading

# Import the custom utility function to load model configurations
from frames.utils.settings import load_models

# Load the model configurations
model_configs = load_models()

# Iterate through the model IDs and print the download message for each model
for model_id in model_configs["id"]:
    print(f"Running download for: {model_id}")
    # Use the Hugging Face CLI to download the model, excluding files with the .gguf extension
    os.system(f"huggingface-cli download {model_id} --exclude=*.gguf")

Running download for: neuralmagic/Llama-3.2-11B-Vision-Instruct-FP8-dynamic


Downloading '.gitattributes' to '/home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/blobs/52373fe24473b1aa44333d318f578ae6bf04b49b.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/blobs/52373fe24473b1aa44333d318f578ae6bf04b49b
Downloading 'README.md' to '/home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/blobs/c31cc5c079beb73dd369410a6a372be8d2e838ad.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/blobs/c31cc5c079beb73dd369410a6a372be8d2e838ad
Downloading 'recipe.yaml' to '/home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/blobs/c0d6d548bbc21bcb28a2d32381a8d9424967408e.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--neu

/home/pedro/.cache/huggingface/hub/models--neuralmagic--Llama-3.2-11B-Vision-Instruct-FP8-dynamic/snapshots/95d23e17186bdadbbd97dd8e7271caa610636b5a
Running download for: Qwen/Qwen2-VL-72B-Instruct-AWQ


Downloading '.gitattributes' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b
Downloading 'LICENSE' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/b547357c30b670122b149c11a1a898db8ecfe4c8.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/b547357c30b670122b149c11a1a898db8ecfe4c8
Downloading 'README.md' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/4d619b4c111fc746568f3d4d031da34f1a0b1876.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/blobs/4d619b4c111fc746568f3d4d031da34f1a0b1876
Downloading 'added_tokens.json' to '/home/p

/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-72B-Instruct-AWQ/snapshots/712d5a5a210e7f0af603d2a949b577c68de2c6ef
Running download for: Qwen/Qwen2-VL-7B-Instruct-AWQ


Downloading '.gitattributes' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b
Downloading 'LICENSE' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/6634c8cc3133b3848ec74b9f275acaaa1ea618ab.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/6634c8cc3133b3848ec74b9f275acaaa1ea618ab
Downloading 'README.md' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/1f2f1097933969af82631da07d42b2d270873a98.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/blobs/1f2f1097933969af82631da07d42b2d270873a98


/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-7B-Instruct-AWQ/snapshots/6ec2560b0afc3a618d4acc9b8e2967d1642f463d
Running download for: Qwen/Qwen2-VL-2B-Instruct-AWQ


Downloading '.gitattributes' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/a6344aac8c09253b3b630fb776ae94478aa0275b
Downloading 'LICENSE' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/6634c8cc3133b3848ec74b9f275acaaa1ea618ab.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/6634c8cc3133b3848ec74b9f275acaaa1ea618ab
Downloading 'README.md' to '/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/b4a2e4ba31ea6d3bcb152fe701e9d0834f329841.incomplete'
Download complete. Moving file to /home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/blobs/b4a2e4ba31ea6d3bcb152fe701e9d0834f329841


/home/pedro/.cache/huggingface/hub/models--Qwen--Qwen2-VL-2B-Instruct-AWQ/snapshots/4f6ea6d22fcf0f8c1ed64d1d2a3d722d4d7bbcea


## NLTK Download

Downloads all of NLTK for offline usage

In [3]:
!python -m nltk.downloader all

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /home/pedro/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /home/pedro/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /home/pedro/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /home/pedro/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_eng is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /home/pedro/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]   

## TikToken tokenizer offline issue fix

In [4]:
import hashlib
from pathlib import Path

import requests

# Define the blob URL and blob path
blob_url = "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"

# Compute the SHA-1 hash of the blob path
hash_filename = hashlib.sha1(blob_url.encode()).hexdigest()

# Define the target directory using pathlib
target_dir = Path(os.environ["TIKTOKEN_CACHE_DIR"])
target_dir.mkdir(parents=True, exist_ok=True)

# Full path to save the file
file_path = target_dir / hash_filename

# Download the file
response = requests.get(blob_url)
response.raise_for_status()  # Ensure the download was successful

# Save the file
file_path.write_bytes(response.content)

print(f"File downloaded and saved to {file_path}")

KeyError: 'TIKTOKEN_CACHE_DIR'

In [None]:
import tiktoken

tiktoken.get_encoding("cl100k_base")