In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

w = torch.nn.Conv2d(2,2,3,padding=1)
print("未人工初始化之前w的权重：",w.weight)

print("1. 使用另一个Conv层的权重初始化")
q = torch.nn.Conv2d(2,2,3,padding=1)
print("q的权重",q.weight)
w.weight=q.weight
print(w.weight)

print("2. 使用来自Tensor的权值")
ones = torch.Tensor(np.ones([2,2,3,3]))
w.weight = torch.nn.Parameter(ones)
print(w.weight)

未人工初始化之前w的权重： Parameter containing:
tensor([[[[ 0.0938,  0.1979,  0.2250],
          [ 0.0160,  0.0163,  0.0819],
          [ 0.1533,  0.0057, -0.1118]],

         [[ 0.1532, -0.1358,  0.1617],
          [ 0.2159, -0.1377,  0.2163],
          [ 0.2129,  0.1197,  0.0804]]],


        [[[-0.0150, -0.0840,  0.0984],
          [ 0.1779, -0.0160,  0.0363],
          [ 0.1586, -0.1000,  0.2185]],

         [[-0.2212, -0.1584,  0.1141],
          [-0.1059,  0.0340,  0.1892],
          [-0.0202,  0.0322,  0.0530]]]], requires_grad=True)
1. 使用另一个Conv层的权重初始化
q的权重 Parameter containing:
tensor([[[[ 0.1820,  0.1627,  0.1445],
          [ 0.1745, -0.1787,  0.0869],
          [ 0.1917, -0.0720, -0.0945]],

         [[-0.1909, -0.0782, -0.1951],
          [ 0.2165,  0.1200, -0.2032],
          [-0.1952,  0.1195,  0.0949]]],


        [[[-0.2200, -0.0828, -0.1513],
          [-0.0629, -0.0266, -0.0422],
          [ 0.0726,  0.0742,  0.2218]],

         [[ 0.1315, -0.1648, -0.1675],
          [ 0.1944, 



## Variable和Parameter的区别
---

Parameter是Variable的一个子类，经常被用于Module的参数，比如weight权值和bias偏重。

Parameter和Module一起使用的时候会有一些特殊的属性，当Parameter赋值给module的属性的时候，
它会自动被加到Module的参数列表中，即会出现在Parameter()的迭代器中。
而将Variable赋给Module的时候没有这样的属性。这样是为了保存模型的时候只保存权值和偏置参数。
`另外，Parameter的requires_grad默认为True，而Variable默认为False。`

Parameter.data - > 得到 tensor 数据
Parameter.requires_grad - > True

## 几种常用的初始化方法
 --- 
 ```python
     torch.nn.init.xavier_normal_()
     torch.nn.init.xavier_uniform_()
     torch.nn.init.normal_()
     torch.nn.init.constant_()
     layer.weight.data.normal_(mean,std)
     layer.bias.data.fill_(1)
 ```
这些方法既可以对tensor进行初始化，也可以对Variable以及Parameter进行初始化。
 

In [7]:
# 对tensor进行初始化
w = torch.FloatTensor(1,3)
print(w)
torch.nn.init.constant_(w,2)
print(w)

tensor([[3.4691e-37, 0.0000e+00, 1.4013e-45]])
tensor([[2., 2., 2.]])


In [20]:
# 对某一层的参数进行初始化
import torch
w = torch.nn.Conv2d(2,2,3,padding=1)
print(w.weight)
torch.nn.init.xavier_uniform_(w.weight)
print(w.weight)

Parameter containing:
tensor([[[[ 0.2038, -0.1263,  0.1419],
          [ 0.1426,  0.1871, -0.1218],
          [-0.0912, -0.0282, -0.1751]],

         [[-0.1216, -0.1523, -0.1610],
          [ 0.0008, -0.0028,  0.1226],
          [-0.2235, -0.1028, -0.0474]]],


        [[[ 0.0685,  0.2059, -0.0564],
          [ 0.1313, -0.0621, -0.0822],
          [-0.2014, -0.0239, -0.0645]],

         [[-0.0972, -0.0697,  0.0499],
          [ 0.1692,  0.1729,  0.2323],
          [-0.1839,  0.0887,  0.0602]]]], requires_grad=True)
Parameter containing:
tensor([[[[ 0.3845, -0.1396, -0.4007],
          [-0.1431, -0.1594,  0.1285],
          [-0.3629, -0.1895,  0.2286]],

         [[-0.1719,  0.3718, -0.2272],
          [ 0.1165, -0.3947, -0.3502],
          [-0.1379, -0.1323, -0.3598]]],


        [[[-0.1687, -0.1742,  0.3303],
          [-0.3114,  0.3304, -0.0697],
          [-0.1152,  0.2530, -0.0241]],

         [[ 0.0510, -0.1412,  0.0053],
          [-0.3865, -0.4049, -0.3436],
          [ 0.0507, 

## 加载模型
---

In [None]:
pretrained_dict = ...
model_dict = model.state_dict()

pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} # 剔除掉原来模型中不存在的键值对

# 更新model_dict
model_dict.update(pretrained_dict)

# 将model_dict载入model模型中
model.load_state_dict(model_dict)

## Module.modules()与Module.children()
---
modules()会以迭代器的形式返回网络中所有模块

而children只会返回所有直接子模块的一个iterator

In [13]:
print(list(torch.nn.Sequential(nn.Linear(10,20), nn.ReLU()).modules()))

print(list(torch.nn.Sequential(nn.Linear(10,20), nn.ReLU()).children()))

[Sequential(
  (0): Linear(in_features=10, out_features=20, bias=True)
  (1): ReLU()
), Linear(in_features=10, out_features=20, bias=True), ReLU()]
[Linear(in_features=10, out_features=20, bias=True), ReLU()]


## 固定部分参数进行训练

### 方法一

In [None]:
count = 0
param_optim
for k in model.children():
    count += 1 
    if count > 5:
        for param in k.parameters()
            param_optim.append(param)
    else:
        for param in k.parameters():
            param.requires_grad = False
optimizer = torch.optim.SGD(param_optim, lr=0.001)

### 方法二

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(1,3,4)
        
        # 以上的网络层不进行训练，固定参数
        for p in self.parameters():
            p.requires_grad=False
        
        #　继续添加网络层
        self.fc1 = nn.Linear(20,30)
        self.fc2 = nn.Linear(20,30)
        self.fc3 = nn.Linear(20,30)

optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)