In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [4]:
# Additional information
EPOCH = 5
PATH = "model.pt"
LOSS = 0.4

torch.save({
            'epoch': EPOCH,
            'model_state_dict': net.state_dict(),  ##对优化器进行保存
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, PATH)

In [5]:
model = Net()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [6]:
class Test(nn.Module):                 ##新建一个类
    def __init__(self):
        super(Test,self).__init__()    ##调用父类的init函数
        self.linear1 = torch.nn.Linear(2,3)         ##创建第一个线性层  Linear(输入特征大小,输出特征大小)
        self.linear2 = torch.nn.Linear(3,4)         ##创建第二个线性层
        self.batch_norm = torch.nn.BatchNorm2d(4)   ##创建batch_orm层
        

In [7]:
test_module = Test()  ##对新的类进行实例化

In [8]:
test_module._modules  ##调用_modules,会返回有序字典，包括linear1、linear2、batch_norm

OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)),
             ('linear2', Linear(in_features=3, out_features=4, bias=True)),
             ('batch_norm',
              BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])

In [9]:
test_module._modules['linear1']  ##单独访问linear1

Linear(in_features=2, out_features=3, bias=True)

In [10]:
test_module._modules['linear1'].weight  ##取得linear1的权重张量

Parameter containing:
tensor([[ 0.2130,  0.6700],
        [-0.4400,  0.5632],
        [ 0.5111,  0.1931]], requires_grad=True)

In [11]:
test_module._modules['linear1'].weight.dtype  ##获取其数据类型

torch.float32

In [12]:
test_module.to(torch.double)  ##调用to函数

Test(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batch_norm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [13]:
test_module._modules['linear1'].weight.dtype  ##数据类型发生变化

torch.float64

In [14]:
test_module._modules['linear1'].weight

Parameter containing:
tensor([[ 0.2130,  0.6700],
        [-0.4400,  0.5632],
        [ 0.5111,  0.1931]], dtype=torch.float64, requires_grad=True)

In [15]:
test_module.to(torch.float32)

Test(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batch_norm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [16]:
test_module._modules['linear1'].weight.dtype

torch.float32

In [17]:
test_module._modules

OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)),
             ('linear2', Linear(in_features=3, out_features=4, bias=True)),
             ('batch_norm',
              BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])

In [18]:
test_module._parameters  ##并没有编写对应的函数，只能找当前的parameters，不能去子类中寻找

OrderedDict()

In [19]:
test_module._buffers    ##并没有编写对应的函数

OrderedDict()

In [20]:
test_module.state_dict()  ##返回一个有序字典，会返回所有的参数和buffer

OrderedDict([('linear1.weight',
              tensor([[ 0.2130,  0.6700],
                      [-0.4400,  0.5632],
                      [ 0.5111,  0.1931]])),
             ('linear1.bias', tensor([ 0.4125, -0.5220, -0.5521])),
             ('linear2.weight',
              tensor([[-0.0231, -0.0630, -0.1831],
                      [-0.3315, -0.1221,  0.3205],
                      [-0.3661, -0.2107, -0.4890],
                      [ 0.1272, -0.2465,  0.0465]])),
             ('linear2.bias', tensor([ 0.4207, -0.0765,  0.4154,  0.5267])),
             ('batch_norm.weight', tensor([1., 1., 1., 1.])),
             ('batch_norm.bias', tensor([0., 0., 0., 0.])),
             ('batch_norm.running_mean', tensor([0., 0., 0., 0.])),
             ('batch_norm.running_var', tensor([1., 1., 1., 1.])),
             ('batch_norm.num_batches_tracked', tensor(0))])

In [21]:
test_module.state_dict()['linear1.weight']

tensor([[ 0.2130,  0.6700],
        [-0.4400,  0.5632],
        [ 0.5111,  0.1931]])

In [27]:
for p in test_module.parameters():  ##parameters是递归的包含子module的函数
    print(p)                         ##_parameters则只能检索当前module的函数不能检索子module的

Parameter containing:
tensor([[ 0.2130,  0.6700],
        [-0.4400,  0.5632],
        [ 0.5111,  0.1931]], requires_grad=True)
Parameter containing:
tensor([ 0.4125, -0.5220, -0.5521], requires_grad=True)
Parameter containing:
tensor([[-0.0231, -0.0630, -0.1831],
        [-0.3315, -0.1221,  0.3205],
        [-0.3661, -0.2107, -0.4890],
        [ 0.1272, -0.2465,  0.0465]], requires_grad=True)
Parameter containing:
tensor([ 0.4207, -0.0765,  0.4154,  0.5267], requires_grad=True)
Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)


In [29]:
for p in test_module.named_parameters(): ##比上边更加清晰
    print(p)

('linear1.weight', Parameter containing:
tensor([[ 0.2130,  0.6700],
        [-0.4400,  0.5632],
        [ 0.5111,  0.1931]], requires_grad=True))
('linear1.bias', Parameter containing:
tensor([ 0.4125, -0.5220, -0.5521], requires_grad=True))
('linear2.weight', Parameter containing:
tensor([[-0.0231, -0.0630, -0.1831],
        [-0.3315, -0.1221,  0.3205],
        [-0.3661, -0.2107, -0.4890],
        [ 0.1272, -0.2465,  0.0465]], requires_grad=True))
('linear2.bias', Parameter containing:
tensor([ 0.4207, -0.0765,  0.4154,  0.5267], requires_grad=True))
('batch_norm.weight', Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True))
('batch_norm.bias', Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True))


In [30]:
for p in test_module.named_children():  ##返回的是一个元组
    print(p)

('linear1', Linear(in_features=2, out_features=3, bias=True))
('linear2', Linear(in_features=3, out_features=4, bias=True))
('batch_norm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))


In [32]:
test_module._modules  ##返回的是一个字典

OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)),
             ('linear2', Linear(in_features=3, out_features=4, bias=True)),
             ('batch_norm',
              BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])

In [34]:
for p in test_module.named_modules():  #与上面的差别是named_modules将自身模块也返回了
    print(p)

('', Test(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batch_norm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
))
('linear1', Linear(in_features=2, out_features=3, bias=True))
('linear2', Linear(in_features=3, out_features=4, bias=True))
('batch_norm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))


In [39]:
for p in test_module.modules(): 
    print(p)
    print('\n')

Test(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batch_norm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


Linear(in_features=2, out_features=3, bias=True)


Linear(in_features=3, out_features=4, bias=True)


BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


