1、SAVE AND LOAD THE MODEL

In [2]:
import torch
import torchvision.models as models

model = models.vgg16(pretrained=True)
torch.save(model.state_dict(), 'model_weights.pth') #只保存模型的权重

model = models.vgg16() # we do not specify pretrained=True, i.e. do not load default weights
model.load_state_dict(torch.load('model_weights.pth')) #加载模型权重---加载状态字典（包含模型中所有的参数和buffer变量）
model.eval() #模型进入推理模式

2、Saving and Loading Models with Shapes

In [None]:
torch.save(model, 'model.pth')  #保存
model = torch.load('model.pth') #加载

3、标准保存做法

In [None]:
EPOCH=5
PATH="model.pt"
LOSS=0.4

torch.save({
    'epoch':EPOCH,
    'model_state_dict':net.state_dict(), #如上
    'optimizer_state_dict':optimizer.state_dict(),
    'loss':LOSS,
}),PATH)

4、标准加载做法

In [None]:
model=Net()
optimizer=optim.SGD(net.parameters(),lx=0.001,momentum=0.9)

checkpoint=torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch=checkpoint['epoch'] #周期
loss=checkpoint['loss']

model.eval()
# -or-
model.train()

5、源码讲解

module.py

In [3]:
#测试类
class Test(torch.nn.Module):
    def __init__(self):
        super(Test,self).__init__()
        self.linear1=torch.nn.Linear(2,3)
        self.linear2=torch.nn.Linear(3,4)
        self.batch_norm=torch.nn.BatchNorm2d(4)
#实例化        
test_module=Test() 
#调用modules
test_module._modules

OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)),
             ('linear2', Linear(in_features=3, out_features=4, bias=True)),
             ('batch_norm',
              BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])

In [4]:
#访问Linear1
test_module._modules['linear1']

Linear(in_features=2, out_features=3, bias=True)

In [5]:
#得到权重张量
test_module._modules['linear1'].weight

Parameter containing:
tensor([[-0.6162,  0.1601],
        [-0.4765, -0.4863],
        [ 0.7029,  0.0386]], requires_grad=True)

In [6]:
test_module._modules['linear1'].weight.dtype

torch.float32

In [8]:
#将float类型变为double类型
test_module.to(torch.double)  #调用to函数使所有浮点类型变为float64
test_module._modules['linear1'].weight.dtype

torch.float64

In [9]:
#返回3个子模块的有序字典
test_module._modules

OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)),
             ('linear2', Linear(in_features=3, out_features=4, bias=True)),
             ('batch_norm',
              BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])

In [10]:
#不遍历子模型，有参数但不调用，所以返回空
test_module._parameters

OrderedDict()

In [11]:
test_module._buffers

OrderedDict()

_save_to_state_dict(self, destination, prefix, keep_vars)，把当前模型所有的参数和buffer遍历，放到字典中，这个字典就为destination，在state_dict中被调用

**state_dict(self, destination=None, prefix='', keep_vars=False)

test_module.state_dict()

_load_from_state_dict( ) 从一个state_dict中得到参数和buffer的值，赋予给当前module

load_state_dict( ) 导入当前模块的参数和buffer，进行遍历操作，用法如state_dict()

_name_members() 查找函数，返回一个迭代器

In [13]:
test_module._parameters

OrderedDict()

In [16]:
for p in test_module.parameters():
    print(p)

Parameter containing:
tensor([[-0.6162,  0.1601],
        [-0.4765, -0.4863],
        [ 0.7029,  0.0386]], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([-0.4142,  0.0175, -0.0078], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([[-0.4881, -0.5177, -0.5248],
        [-0.1947,  0.1741, -0.3537],
        [-0.1273,  0.5249, -0.3507],
        [ 0.3154, -0.2953,  0.3269]], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([-0.2239, -0.1472, -0.1624,  0.3849], dtype=torch.float64,
       requires_grad=True)
Parameter containing:
tensor([1., 1., 1., 1.], dtype=torch.float64, requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0.], dtype=torch.float64, requires_grad=True)


In [18]:
for p in test_module.named_parameters():
    print(p)

('linear1.weight', Parameter containing:
tensor([[-0.6162,  0.1601],
        [-0.4765, -0.4863],
        [ 0.7029,  0.0386]], dtype=torch.float64, requires_grad=True))
('linear1.bias', Parameter containing:
tensor([-0.4142,  0.0175, -0.0078], dtype=torch.float64, requires_grad=True))
('linear2.weight', Parameter containing:
tensor([[-0.4881, -0.5177, -0.5248],
        [-0.1947,  0.1741, -0.3537],
        [-0.1273,  0.5249, -0.3507],
        [ 0.3154, -0.2953,  0.3269]], dtype=torch.float64, requires_grad=True))
('linear2.bias', Parameter containing:
tensor([-0.2239, -0.1472, -0.1624,  0.3849], dtype=torch.float64,
       requires_grad=True))
('batch_norm.weight', Parameter containing:
tensor([1., 1., 1., 1.], dtype=torch.float64, requires_grad=True))
('batch_norm.bias', Parameter containing:
tensor([0., 0., 0., 0.], dtype=torch.float64, requires_grad=True))


children()--named_children()，返回一个迭代器，对所有的子模块进行调用，返回每个子模块的名称和本身

In [19]:
for p in test_module.named_children():
    print(p)

('linear1', Linear(in_features=2, out_features=3, bias=True))
('linear2', Linear(in_features=3, out_features=4, bias=True))
('batch_norm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))


modules()--named_modules()，也会返回自身

In [20]:
for p in test_module.named_modules():
    print(p)

('', Test(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batch_norm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
))
('linear1', Linear(in_features=2, out_features=3, bias=True))
('linear2', Linear(in_features=3, out_features=4, bias=True))
('batch_norm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
