In [1]:
import torch
from accelerate import Accelerator
from functions import get_model

model, _, _ = get_model()

#分卷保存模型参数
Accelerator().save_model(model,
                         'model/accelerator.save_model',
                         max_shard_size='500MB',
                         safe_serialization=True)

model.classifier.weight

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Parameter containing:
tensor([[ 0.0083, -0.0032,  0.0090,  ..., -0.0094, -0.0195, -0.0106],
        [ 0.0328,  0.0008, -0.0473,  ..., -0.0041,  0.0306, -0.0190],
        [-0.0062, -0.0369, -0.0352,  ..., -0.0250, -0.0268,  0.0037],
        ...,
        [-0.0247,  0.0047, -0.0030,  ...,  0.0281,  0.0045, -0.0040],
        [ 0.0285,  0.0244,  0.0216,  ...,  0.0085,  0.0094, -0.0250],
        [ 0.0051, -0.0237,  0.0188,  ...,  0.0097,  0.0214,  0.0038]],
       requires_grad=True)

In [2]:
from accelerate import load_checkpoint_in_model

model, _, _ = get_model()

#直接加载模型参数
load_checkpoint_in_model(model, 'model/accelerator.save_model')

model.classifier.weight

Parameter containing:
tensor([[ 0.0083, -0.0032,  0.0090,  ..., -0.0094, -0.0195, -0.0106],
        [ 0.0328,  0.0008, -0.0473,  ..., -0.0041,  0.0306, -0.0190],
        [-0.0062, -0.0369, -0.0352,  ..., -0.0250, -0.0268,  0.0037],
        ...,
        [-0.0247,  0.0047, -0.0030,  ...,  0.0281,  0.0045, -0.0040],
        [ 0.0285,  0.0244,  0.0216,  ...,  0.0085,  0.0094, -0.0250],
        [ 0.0051, -0.0237,  0.0188,  ...,  0.0097,  0.0214,  0.0038]],
       requires_grad=True)

In [3]:
from accelerate import init_empty_weights

#构建模型但不初始化参数,这可以节省内存
with init_empty_weights():
    model, _, _ = get_model()

model.classifier.weight

Parameter containing:
tensor(..., device='meta', size=(10, 768), requires_grad=True)

In [4]:
from accelerate import load_checkpoint_and_dispatch

#加载参数
#device_map=auto,自动分配设备,首先会使用显存,然后是内存,最后是磁盘
#这里使用的是手动分配
model = load_checkpoint_and_dispatch(model,
                                     checkpoint='model/accelerator.save_model',
                                     device_map={
                                         'bert': 0,
                                         'dropout': 'cpu',
                                         'classifier': 'disk'
                                     },
                                     offload_folder='offload_folder')

model.classifier.weight, model.hf_device_map

  0%|          | 0/235 [00:00<?, ?w/s]

  0%|          | 0/278 [00:00<?, ?w/s]

  0%|          | 0/8 [00:00<?, ?w/s]



(Parameter containing:
 tensor(..., device='meta', size=(10, 768), requires_grad=True),
 {'bert': 0, 'dropout': 'cpu', 'classifier': 'disk'})

In [5]:
#计算过程也会自动切换设备
input_ids = torch.randint(0, 15, [2, 15])

model(input_ids).logits

tensor([[ 0.2504,  0.3525, -0.2611, -0.2667, -0.5293,  0.4584,  0.6846, -0.3762,
          0.1253,  0.3386],
        [ 0.2114,  0.4296, -0.3503, -0.1658, -0.3216,  0.0624,  0.6829, -0.1385,
         -0.0096,  0.0238]], grad_fn=<ToCopyBackward0>)