[source](https://zhuanlan.zhihu.com/p/641235650)

In [7]:
from accelerate.utils import get_balanced_memory, infer_auto_device_map, find_tied_parameters
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from accelerate import dispatch_model

from transformers import AutoTokenizer, AutoModel, AutoConfig

In [12]:
import torch
import torch.nn as nn
from collections import OrderedDict

In [3]:
model = nn.Sequential(OrderedDict([("linear1", nn.Linear(4, 4)), ("linear2", nn.Linear(4, 4))]))
model.linear2.weight == model.linear1.weight

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [4]:
print(find_tied_parameters(model))

[]


In [5]:
model.linear2.weight = model.linear1.weight

In [6]:
print(find_tied_parameters(model))

[['linear1.weight', 'linear2.weight']]


### 不加载权重，仅加载模型结构

In [8]:
model_path = '/root/.cache/huggingface/hub/THUDM/chatglm2-6b-32k'

In [9]:
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)

In [10]:
with init_empty_weights():
   model = AutoModel.from_config(config, trust_remote_code=True)

优点：<br>
1. <font color=blue>使用 init_empty_weights()，不消耗显存和内存，没有加载权重耗时，方便查看模型结构和模型参数。</font>

2. <font color=blue>方便之后动态计算 device_map 和 max_memory</font>

In [13]:
model.tie_weights()
device_map = infer_auto_device_map(model, dtype=torch.int8, no_split_module_classes=model._no_split_modules)
max_memory =  get_balanced_memory(model)

In [14]:
device_map

{'transformer': 0}

In [15]:
max_memory

{0: 11240747827.2, 'cpu': 64275992576}

### 正常加载权重

In [16]:
# model = AutoModelForCausalLM.from_pretrained(
#     pretrained_model_name_or_path=model_path,
#     load_in_8bit=load_in_8bit,
#     torch_dtype=torch.float16,
#     device_map='auto',
#     trust_remote_code=True)