# Manage Next HF Caches

In [60]:
import os
# Set hf_cache path
os.environ['HF_HOME'] = '/next_share/hf_cache'
from huggingface_hub import HfApi, scan_cache_dir, try_to_load_from_cache
from transformers import AutoConfig
import json
import importlib

import utilities
importlib.reload(utilities)
from utilities import cvt_cache_info

In [62]:
cache_dt = cvt_cache_info(cache_info)
# cache_dt
print(json.dumps(cache_dt, indent = 4))

{
    "size_on_disk": 299875052733,
    "repos": [
        {
            "repo_id": "bert-base-uncased",
            "repo_type": "model",
            "repo_path": "/ssd4/hf_cache/hub/models--bert-base-uncased",
            "size_on_disk": 3454102158,
            "nb_files": 16,
            "revisions": [
                {
                    "commit_hash": "86b5e0934494bd15c9632b12f734a8a67f723594",
                    "snapshot_path": "/ssd4/hf_cache/hub/models--bert-base-uncased/snapshots/86b5e0934494bd15c9632b12f734a8a67f723594",
                    "size_on_disk": 3454102158,
                    "files": 16,
                    "refs": [
                        "main"
                    ],
                    "last_modified": 1710158238.069546
                }
            ],
            "last_accessed": 1710158211.348558,
            "last_modified": 1710158238.069546
        },
        {
            "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
            "repo_type": "mode

In [68]:
[k['repo_id'] for k in cache_dt['repos']]

['bert-base-uncased',
 'mistralai/Mistral-7B-Instruct-v0.1',
 'meta-llama/Meta-Llama-3-8B',
 'meta-llama/Llama-2-13b-chat-hf',
 'meta-llama/Llama-2-13b-hf',
 'google/flan-t5-xl',
 'Qwen/Qwen2.5-1.5B-Instruct',
 'mistralai/Mistral-7B-v0.1',
 'meta-llama/Meta-Llama-3-8B-Instruct',
 'gpt2',
 'mistralai/Mistral-7B-Instruct-v0.2',
 'google/t5-v1_1-large',
 'google/flan-t5-large',
 'meta-llama/Llama-2-7b-chat-hf',
 'meta-llama/Llama-2-7b-hf']

In [67]:
cache_dt['repos'][0]['repo_path']

'/ssd4/hf_cache/hub/models--bert-base-uncased'

In [64]:
AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B")

LlamaConfig {
  "_name_or_path": "meta-llama/Meta-Llama-3-8B",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 500000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.39.0",
  "use_cache": true,
  "vocab_size": 128256
}

In [3]:
cache_info = scan_cache_dir()

In [4]:
print(dir(cache_info))



In [5]:
cache_info.size_on_disk / 1000**3

299.875052733

In [10]:
repo_info = list(cache_info.repos)[0]
print(dir(repo_info))

['__annotations__', '__class__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'last_accessed', 'last_accessed_str', 'last_modified', 'last_modified_str', 'nb_files', 'refs', 'repo_id', 'repo_path', 'repo_type', 'revisions', 'size_on_disk', 'size_on_disk_str']


In [29]:
kv = [(k,v) for k,v in repo_info.__dict__.items() if not (k.startswith('_') or k in ['refs', 'revisions'])]
for k,v in kv:
    print(f'{k}: {v}')
print(f'refs: {repo_info.refs.keys()}')

repo_id: bert-base-uncased
repo_type: model
repo_path: /ssd4/hf_cache/hub/models--bert-base-uncased
size_on_disk: 3454102158
nb_files: 16
last_accessed: 1710158211.348558
last_modified: 1710158238.069546
refs: dict_keys(['main'])


In [31]:
rev_list = list(repo_info.revisions)
print(len(rev_list))
print(type(rev_list))

1
<class 'list'>


In [33]:
ref_info = list(repo_info.revisions)[0]
print(dir(ref_info))

['__annotations__', '__class__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'commit_hash', 'files', 'last_modified', 'last_modified_str', 'nb_files', 'refs', 'size_on_disk', 'size_on_disk_str', 'snapshot_path']


In [37]:
kv = [(k,v) for k,v in ref_info.__dict__.items() if not (k.startswith('_') or k in ['files',])]
for k,v in kv:
    print(f'{k}: {v}')
print(f'files: {len(ref_info.files)}')

commit_hash: 86b5e0934494bd15c9632b12f734a8a67f723594
snapshot_path: /ssd4/hf_cache/hub/models--bert-base-uncased/snapshots/86b5e0934494bd15c9632b12f734a8a67f723594
size_on_disk: 3454102158
refs: frozenset({'main'})
last_modified: 1710158238.069546
files: 16


In [41]:
list(repo_info.revisions)[0].refs

frozenset({'main'})