# module.nn 模块

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [27]:
# temperature data
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]    # Clesius
t_u = [35.7,55.9, 58.2, 81.9, 56.3, 48.9,33.9,21.8,48.4, 60.4, 68.4]    # unknown units
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)
shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

print(train_t_un)
print(train_t_c)
print(val_t_un)
print(val_t_c)

tensor([2.1800, 6.8400, 3.5700, 4.8400, 5.8200, 4.8900, 6.0400, 5.6300, 8.1900])
tensor([-4.0000, 21.0000,  0.5000,  6.0000, 15.0000,  8.0000, 13.0000, 11.0000,
        28.0000])
tensor([5.5900, 3.3900])
tensor([14.,  3.])


In [32]:
linear_model = nn.Linear(1, 1)
linear_model(val_t_un)

RuntimeError: size mismatch, m1: [1 x 2], m2: [1 x 1] at ..\aten\src\TH/generic/THTensorMath.cpp:41

没有搞清楚原文中的变量"t_un_val"到底是什么  
clarity - 明晰  
implementation - 履行、施行  
hook - 钩子、勾？  
**文中说不太建议使用`.forward()`函数，因为这个有可能导致很多的hooks没有被调用**？？

In [10]:
x = torch.ones(1)
linear_model(x)

tensor([-1.6716], grad_fn=<AddBackward0>)

PyTorch nn.module是为了多个样本同时处理而生的  
为了能够同时处理多个样本，modules的输入最好是第0维的【是说最好是一维的吗】

In [11]:
x = torch.ones(10, 1)
linear_model(x)

tensor([[-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716],
        [-1.6716]], grad_fn=<AddmmBackward>)

In [12]:
# temperature data
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]    # Clesius
t_u = [35.7,55.9, 58.2, 81.9, 56.3, 48.9,33.9,21.8,48.4, 60.4, 68.4]    # unknown units
t_c = torch.tensor(t_c).unsqueeze(1)
t_u = torch.tensor(t_u).unsqueeze(1)
t_u.shape

torch.Size([11, 1])

In [14]:
import torch.optim as optim

In [15]:
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(),
                     lr=1e-2)

In [16]:
linear_model.parameters()

<generator object Module.parameters at 0x0000015BC6082BF8>

In [17]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[0.5820]], requires_grad=True),
 Parameter containing:
 tensor([-0.6653], requires_grad=True)]

现在，不用再自己创建parameter变量了，可以直接进行调用  

In [23]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val):
    for epoch in range(1, n_epochs+1):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)
        
        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print('Epoch {}, Training loss {}, Validation loss {}'.format(epoch, float(loss_train), float(loss_val)))

MSE - mean square error 是比较常用的loss，之前的代码中就是这个loss函数

In [24]:
training_loop(n_epochs=3000,
             optimizer=optimizer,
             model=linear_model,
             loss_fn=nn.MSELoss(),
             t_u_train=train_t_un,
             t_u_val=val_t_un,
             t_c_train=train_t_c,
             t_c_val=val_t_c)

print()
print(linear_model.weight)
print(linear_model.bias)

RuntimeError: size mismatch, m1: [1 x 9], m2: [1 x 1] at ..\aten\src\TH/generic/THTensorMath.cpp:41

In [33]:
seq_model = nn.Sequential(nn.Linear(1, 13),
                         nn.Tanh(),
                         nn.Linear(13, 1))
seq_model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

In [34]:
[param.shape for param in seq_model.parameters()]

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

往往很难分清这些变量都是哪里的  
可以使用`named_parameters`来给它们起名字

In [35]:
for name, param in seq_model.named_parameters():
    print(name, param.shape)

0.weight torch.Size([13, 1])
0.bias torch.Size([13])
2.weight torch.Size([1, 13])
2.bias torch.Size([1])


你也可以使用OrderedDict，这样就可以再 Sequential中给layer起名字了  
这里就不起了，感兴趣可以去看5.2小节，P126页/141   
当你给layer起了名字之后，就可以像下面这样直接地去调用了

In [36]:
seq_model.output_linear.bias

ModuleAttributeError: 'Sequential' object has no attribute 'output_linear'

In [37]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(n_epochs=5000,
             optimizer=optimizer,
             model=seq_model,
             loss_fn=nn.MSELoss(),
             t_u_train=train_t_un,
             t_u_val=val_t_un,
             t_c_train=train_t_c,
             t_c_val=val_t_c)

print("output", seq_model(val_t_un))
print("answer", val_t_c)

RuntimeError: size mismatch, m1: [1 x 9], m2: [1 x 13] at ..\aten\src\TH/generic/THTensorMath.cpp:41