# Download Hugging Face Models for Offline Use

This notebook downloads Hugging Face models to a configurable location for offline use in production environments.

## Configuration
Set your desired cache directory and models to download below.

In [1]:
import os
import sys
from pathlib import Path

import torch
from transformers import (
    AutoModel,
    AutoTokenizer,
    LayoutLMForTokenClassification,
    LayoutLMTokenizer,
    LayoutLMv2ForTokenClassification,
    LayoutLMv2Tokenizer,
)

print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"Transformers available: {True}")

Python version: 3.11.12 | packaged by conda-forge | (main, Apr 10 2025, 22:18:52) [Clang 18.1.8 ]
PyTorch version: 2.1.0
Transformers available: True


## Configuration Settings

Configure the download location and models to download.

In [4]:
# Configuration - Modify these as needed
CACHE_DIR = os.getenv("HF_HOME", "/Users/tod/PretrainedLLM")
FORCE_REDOWNLOAD = False  # Set to True to re-download existing models

# Models to download - Add or remove as needed
MODELS_TO_DOWNLOAD = [
    {
        "name": "microsoft/layoutlm-base-uncased",
        "type": "layoutlm",
        "description": "LayoutLM base model for document understanding",
    }
]

print(f"üìÅ Cache directory: {CACHE_DIR}")
print(f"üîÑ Force redownload: {FORCE_REDOWNLOAD}")
print(f"üì¶ Models to download: {len(MODELS_TO_DOWNLOAD)}")

üìÅ Cache directory: /Users/tod/PretrainedLLM
üîÑ Force redownload: False
üì¶ Models to download: 1


## Setup Cache Directory

Create the cache directory and set environment variables.

In [5]:
# Create cache directory
cache_path = Path(CACHE_DIR)
cache_path.mkdir(parents=True, exist_ok=True)

# Set environment variables for Hugging Face
os.environ["HF_HOME"] = str(cache_path)
os.environ["TRANSFORMERS_CACHE"] = str(cache_path)
os.environ["HF_DATASETS_CACHE"] = str(cache_path / "datasets")

print(f"‚úÖ Cache directory created: {cache_path}")
print("üåç Environment variables set:")
print(f"  HF_HOME: {os.environ['HF_HOME']}")
print(f"  TRANSFORMERS_CACHE: {os.environ['TRANSFORMERS_CACHE']}")
print(f"  HF_DATASETS_CACHE: {os.environ['HF_DATASETS_CACHE']}")

‚úÖ Cache directory created: /Users/tod/PretrainedLLM
üåç Environment variables set:
  HF_HOME: /Users/tod/PretrainedLLM
  TRANSFORMERS_CACHE: /Users/tod/PretrainedLLM
  HF_DATASETS_CACHE: /Users/tod/PretrainedLLM/datasets


## Download Helper Functions

Functions to download different types of models.

In [None]:
def download_layoutlm_model(model_name, cache_dir, force_redownload=False):
    """Download LayoutLM model and tokenizer."""
    print(f"üì• Downloading LayoutLM: {model_name}")

    try:
        # Download tokenizer
        print("  üìù Downloading tokenizer...")
        _tokenizer = LayoutLMTokenizer.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        # Download model
        print("  üß† Downloading model...")
        _model = LayoutLMForTokenClassification.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        print("  ‚úÖ LayoutLM downloaded successfully")
        return True

    except Exception as e:
        print(f"  ‚ùå Error downloading LayoutLM: {e}")
        return False


def download_layoutlmv2_model(model_name, cache_dir, force_redownload=False):
    """Download LayoutLMv2 model and tokenizer."""
    print(f"üì• Downloading LayoutLMv2: {model_name}")

    try:
        # Download tokenizer
        print("  üìù Downloading tokenizer...")
        _tokenizer = LayoutLMv2Tokenizer.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        # Download model
        print("  üß† Downloading model...")
        _model = LayoutLMv2ForTokenClassification.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        print("  ‚úÖ LayoutLMv2 downloaded successfully")
        return True

    except Exception as e:
        print(f"  ‚ùå Error downloading LayoutLMv2: {e}")
        return False


def download_auto_model(model_name, cache_dir, force_redownload=False):
    """Download any model using AutoModel and AutoTokenizer."""
    print(f"üì• Downloading Auto model: {model_name}")

    try:
        # Download tokenizer
        print("  üìù Downloading tokenizer...")
        _tokenizer = AutoTokenizer.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        # Download model
        print("  üß† Downloading model...")
        _model = AutoModel.from_pretrained(
            model_name, cache_dir=cache_dir, force_download=force_redownload
        )

        print("  ‚úÖ Auto model downloaded successfully")
        return True

    except Exception as e:
        print(f"  ‚ùå Error downloading Auto model: {e}")
        return False


def get_model_downloader(model_type):
    """Get the appropriate download function for model type."""
    downloaders = {
        "layoutlm": download_layoutlm_model,
        "layoutlmv2": download_layoutlmv2_model,
        "bert": download_auto_model,
        "auto": download_auto_model,
    }
    return downloaders.get(model_type, download_auto_model)

## Download Models

Download all configured models to the cache directory.

In [7]:
# Download all models
print(f"üöÄ Starting download of {len(MODELS_TO_DOWNLOAD)} models...\n")

downloaded_successfully = []
failed_downloads = []

for i, model_config in enumerate(MODELS_TO_DOWNLOAD, 1):
    model_name = model_config["name"]
    model_type = model_config["type"]
    description = model_config["description"]

    print(f"üì¶ [{i}/{len(MODELS_TO_DOWNLOAD)}] {model_name}")
    print(f"   üìã {description}")
    print(f"   üè∑Ô∏è  Type: {model_type}")

    # Get the appropriate downloader
    downloader = get_model_downloader(model_type)

    # Download the model
    success = downloader(model_name, CACHE_DIR, FORCE_REDOWNLOAD)

    if success:
        downloaded_successfully.append(model_name)
    else:
        failed_downloads.append(model_name)

    print()  # Empty line for readability

# Summary
print("‚ïê" * 60)
print("üìä DOWNLOAD SUMMARY")
print("‚ïê" * 60)
print(f"‚úÖ Successfully downloaded: {len(downloaded_successfully)}")
for model in downloaded_successfully:
    print(f"   ‚úì {model}")

if failed_downloads:
    print(f"\n‚ùå Failed downloads: {len(failed_downloads)}")
    for model in failed_downloads:
        print(f"   ‚úó {model}")
else:
    print("\nüéâ All models downloaded successfully!")

print(f"\nüìÅ Models cached in: {CACHE_DIR}")

üöÄ Starting download of 1 models...

üì¶ [1/1] microsoft/layoutlm-base-uncased
   üìã LayoutLM base model for document understanding
   üè∑Ô∏è  Type: layoutlm
üì• Downloading LayoutLM: microsoft/layoutlm-base-uncased
  üìù Downloading tokenizer...


tokenizer_config.json:   0%|          | 0.00/170 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/606 [00:00<?, ?B/s]

  üß† Downloading model...


model.safetensors:   0%|          | 0.00/451M [00:00<?, ?B/s]

Some weights of LayoutLMForTokenClassification were not initialized from the model checkpoint at microsoft/layoutlm-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  ‚úÖ LayoutLM downloaded successfully

‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
üìä DOWNLOAD SUMMARY
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
‚úÖ Successfully downloaded: 1
   ‚úì microsoft/layoutlm-base-uncased

üéâ All models downloaded successfully!

üìÅ Models cached in: /Users/tod/PretrainedLLM


## Verify Downloads

Check what was actually downloaded to the cache directory.

In [8]:
# List contents of cache directory
print("üìÇ Cache Directory Contents:")
print("‚ïê" * 40)

cache_path = Path(CACHE_DIR)
if cache_path.exists():
    for item in sorted(cache_path.iterdir()):
        if item.is_dir():
            # Count files in subdirectory
            try:
                file_count = len(list(item.rglob("*")))
                print(f"üìÅ {item.name}/ ({file_count} files)")
            except PermissionError:
                print(f"üìÅ {item.name}/ (permission denied)")
        else:
            file_size = item.stat().st_size / (1024 * 1024)  # MB
            print(f"üìÑ {item.name} ({file_size:.1f} MB)")
else:
    print("‚ùå Cache directory does not exist")

# Show total cache size
try:
    total_size = sum(f.stat().st_size for f in cache_path.rglob("*") if f.is_file())
    total_size_mb = total_size / (1024 * 1024)
    total_size_gb = total_size_mb / 1024

    print("\nüìä Cache Statistics:")
    print(f"üíæ Total size: {total_size_mb:.1f} MB ({total_size_gb:.2f} GB)")
    print(f"üìÅ Cache location: {cache_path.absolute()}")
except Exception as e:
    print(f"\n‚ö†Ô∏è  Could not calculate cache size: {e}")

üìÇ Cache Directory Contents:
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
üìÑ .DS_Store (0.0 MB)
üìÅ .locks/ (7 files)
üìÅ InternVL2_5-1B/ (74 files)
üìÅ InternVL3-1B/ (67 files)
üìÅ Llama-3.2-1B/ (10 files)
üìÅ ModernBERT-base/ (5 files)
üìÅ all-MiniLM-L12-v2/ (18 files)
üìÅ all-MiniLM-L6-v2/ (6 files)
üìÅ all-mpnet-base-v2/ (14 files)
üìÅ gte-base-en-v1.5/ (12 files)
üìÅ gte-small/ (14 files)
üìÑ hf_llama_token.txt (0.0 MB)
üìÅ models--microsoft--layoutlm-base-uncased/ (21 files)
üìÅ paraphrase-MiniLM-L3-v2/ (12 files)
üìÅ paraphrase-MiniLM-L6-v2/ (12 files)
üìÅ paraphrase-multilingual-MiniLM-L12-v2/ (95 files)
üìÅ roberta_model/ (7 files)
üìÅ swin_large/ (3 files)

üìä Cache Statistics:
üíæ Total size: 7796.9 MB (7.61 GB)
üìÅ Cache location: /Users/tod/PretrainedLLM


## Test Offline Loading

Test that downloaded models can be loaded offline.

In [9]:
# Test offline loading of downloaded models
print("üß™ Testing Offline Model Loading")
print("‚ïê" * 40)

# Simulate offline environment
os.environ["HF_HUB_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1"

test_results = []

for model_config in MODELS_TO_DOWNLOAD:
    model_name = model_config["name"]
    model_type = model_config["type"]

    print(f"üîç Testing: {model_name}")

    try:
        if model_type == "layoutlm":
            tokenizer = LayoutLMTokenizer.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )
            model = LayoutLMForTokenClassification.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )
        elif model_type == "layoutlmv2":
            tokenizer = LayoutLMv2Tokenizer.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )
            model = LayoutLMv2ForTokenClassification.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )
        else:
            tokenizer = AutoTokenizer.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )
            model = AutoModel.from_pretrained(
                model_name, local_files_only=True, cache_dir=CACHE_DIR
            )

        print("  ‚úÖ Loaded successfully offline")
        test_results.append((model_name, True))

    except Exception as e:
        print(f"  ‚ùå Failed to load offline: {e}")
        test_results.append((model_name, False))

# Summary of offline tests
print("\nüìã Offline Loading Test Results:")
successful_offline = sum(1 for _, success in test_results if success)
print(f"‚úÖ Successfully loaded offline: {successful_offline}/{len(test_results)}")

for model_name, success in test_results:
    status = "‚úÖ" if success else "‚ùå"
    print(f"  {status} {model_name}")

if successful_offline == len(test_results):
    print("\nüéâ All models ready for offline use!")
else:
    print("\n‚ö†Ô∏è  Some models may need to be re-downloaded")

Some weights of LayoutLMForTokenClassification were not initialized from the model checkpoint at microsoft/layoutlm-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


üß™ Testing Offline Model Loading
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
üîç Testing: microsoft/layoutlm-base-uncased
  ‚úÖ Loaded successfully offline

üìã Offline Loading Test Results:
‚úÖ Successfully loaded offline: 1/1
  ‚úÖ microsoft/layoutlm-base-uncased

üéâ All models ready for offline use!


## Environment Configuration

Generate environment variable settings for your configuration.

In [10]:
# Generate environment configuration
print("‚öôÔ∏è  Environment Configuration")
print("‚ïê" * 40)
print("Add these to your .env file or export them in your shell:\n")

env_config = f"""
# Hugging Face Cache Configuration
export HF_HOME="{CACHE_DIR}"
export TRANSFORMERS_CACHE="{CACHE_DIR}"
export HF_DATASETS_CACHE="{CACHE_DIR}/datasets"

# Offline mode (uncomment for production)
# export HF_HUB_OFFLINE=1
# export TRANSFORMERS_OFFLINE=1
"""

print(env_config)

# Also show .env file format
print("\nüìÑ .env file format:")
print("‚îÄ" * 20)
print(f"HF_HOME={CACHE_DIR}")
print(f"TRANSFORMERS_CACHE={CACHE_DIR}")
print(f"HF_DATASETS_CACHE={CACHE_DIR}/datasets")
print("# HF_HUB_OFFLINE=1")
print("# TRANSFORMERS_OFFLINE=1")

print(f"\n‚úÖ Models are ready for offline use from: {CACHE_DIR}")

‚öôÔ∏è  Environment Configuration
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
Add these to your .env file or export them in your shell:


# Hugging Face Cache Configuration
export HF_HOME="/Users/tod/PretrainedLLM"
export TRANSFORMERS_CACHE="/Users/tod/PretrainedLLM"
export HF_DATASETS_CACHE="/Users/tod/PretrainedLLM/datasets"

# Offline mode (uncomment for production)
# export HF_HUB_OFFLINE=1
# export TRANSFORMERS_OFFLINE=1


üìÑ .env file format:
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
HF_HOME=/Users/tod/PretrainedLLM
TRANSFORMERS_CACHE=/Users/tod/PretrainedLLM
HF_DATASETS_CACHE=/Users/tod/PretrainedLLM/datasets
# HF_HUB_OFFLINE=1
# TRANSFORMERS_OFFLINE=1

‚úÖ Models are ready for offline use from: /Users/tod/PretrainedLLM
