* ### 快速搭建：(nn.Sequential自动forward)

In [3]:
import torch
import torch.nn as nn

net2 = nn.Sequential(
    nn.Linear(1, 10),
    nn.ReLU(),
    nn.Linear(10, 1)
)
print(net2)

Sequential(
  (0): Linear(in_features=1, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=1, bias=True)
)


* net2把激励函数也一同纳入进去了, 但是 net1 中, 激励函数实际上是在 forward() 功能中才被调用的. 这也就说明了, 相比 net2, net1 的好处就是, 你可以根据你的个人需要更加个性化你自己的前向传播过程, 比如(RNN). 

* ### 保存 & 提取(save & restore)
训练好了一个模型, 我们当然想要保存它, 留到下次要用的时候直接提取直接用

In [4]:
torch.manual_seed(1)    # random reproducible

<torch._C.Generator at 0x1e0e5880730>

In [8]:
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)

net1 = nn.Sequential(
    nn.Linear(1, 10),
    nn.ReLU(),
    nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_func = nn.MSELoss()

# 训练
for t in range(100):
    prediction = net1(x)
    loss = loss_func(prediction, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
for var_name in optimizer.state_dict():
    print(var_name, ": ", optimizer.state_dict()[var_name])

state:{}
param_groups:[{'lr': 0.5, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [2065478105848, 2065478105704, 2065478106136, 2065478106280]}]


In [10]:
torch.save(net1, 'net.pkl')  # 保存整个网络
torch.save(net1.state_dict(), 'net_params.pkl')   # 只保存网络中的参数 (速度快, 占内存少)

In [11]:
# 提取整个网络
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

In [12]:
restore_net()

In [13]:
for param_tensor in net2.state_dict():
    print(param_tensor, ": ", net2.state_dict()[param_tensor].size())

0.weight:torch.Size([10, 1])
0.bias:torch.Size([10])
2.weight:torch.Size([1, 10])
2.bias:torch.Size([1])


In [14]:
net3 = nn.Sequential(
        nn.Linear(1, 10),
        nn.ReLU(),
        nn.Linear(10, 1)
    )

for param_tensor in net3.state_dict():
    print(param_tensor, ": ", net3.state_dict()[param_tensor].size())

0.weight:torch.Size([10, 1])
0.bias:torch.Size([10])
2.weight:torch.Size([1, 10])
2.bias:torch.Size([1])


In [17]:
net3.load_state_dict(torch.load('net_params.pkl'))  # 只提取网络中参数
prediction = net3(x)