### 1.测试

In [1]:
import torch
x=torch.rand(5,3)
print(x)

tensor([[0.9023, 0.9782, 0.5830],
        [0.8467, 0.5440, 0.8336],
        [0.1165, 0.3985, 0.6869],
        [0.9527, 0.6089, 0.0115],
        [0.8465, 0.4207, 0.0317]])


In [2]:
torch.cuda.is_available()

False

### 2.自动微分

#### 2.1损失

In [3]:
import torch

x=torch.ones(5)
y=torch.zeros(3)
w=torch.randn(5,3,requires_grad=True)
b=torch.randn(3,requires_grad=True)
z=torch.matmul(x,w)+b
loss=torch.nn.functional.binary_cross_entropy_with_logits(z,y)
print(loss)

tensor(1.4624, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)


#### 2.2反向传播

In [4]:
print('w_grad:',w.grad)
print('b_grad:',b.grad)
print('-'*30)
loss.backward()
print('w_grad After backward:',w.grad)
print('b_grad After backward:',b.grad)

w_grad: None
b_grad: None
------------------------------
w_grad After backward: tensor([[0.2616, 0.3008, 0.1358],
        [0.2616, 0.3008, 0.1358],
        [0.2616, 0.3008, 0.1358],
        [0.2616, 0.3008, 0.1358],
        [0.2616, 0.3008, 0.1358]])
b_grad After backward: tensor([0.2616, 0.3008, 0.1358])


### 3.Linear

In [5]:
from torch import nn
import torch

model=nn.Linear(2,1)
input = torch.Tensor([1,2])
output = model(input)
print('output:',output)
print('-'*30)

for param in model.parameters():
    print('param:',param)

output: tensor([-0.1860], grad_fn=<AddBackward0>)
------------------------------
param: Parameter containing:
tensor([[-0.0899,  0.1755]], requires_grad=True)
param: Parameter containing:
tensor([-0.4471], requires_grad=True)


### 4.RNN

#### 4.1RNN参数

In [6]:
from torch import nn

rnn=nn.RNN(100,10)
print('parameters:',rnn._parameters.keys())

# weight_ih_l0: 第0层的输入层和隐含层之间的权重
# weight_hh_l0: 第0层的隐含层之间在不同时间步之间的权重
print(rnn.weight_ih_l0.shape,rnn.weight_hh_l0.shape)

print(rnn.bias_ih_l0.shape,rnn.bias_hh_l0.shape)

parameters: odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
torch.Size([10, 100]) torch.Size([10, 10])
torch.Size([10]) torch.Size([10])


#### 4.2RNN前向传播

In [7]:
'''5层RNN'''
import torch
from torch import nn

# (词向量维度)feature_len=100, (神经元数)hidden_len=20, 网络层数=5
rnn=nn.RNN(input_size=100,hidden_size=20,num_layers=5)
# 单词数量(seq_len=10),句子数量(batch=3),每个特征100维度(feature_len=100)
x=torch.randn(10,3,100)

# h_0的shape是[网络层数=5, batch=3, (神经元数)hidden_len=20]
out,h=rnn(x,torch.zeros(5,3,20))

print(out.shape)
print(h.shape)

torch.Size([10, 3, 20])
torch.Size([5, 3, 20])


4层的RNN，用来做语音翻译，输入是一段中文，输出是一段英文。
假设每个中文字符用100维数据进行编码，每个隐含层的维度是20，有4个隐含层。所以input_size = 100，hidden_size = 20，num_layers = 4。
再假设模型已经训练好了，现在有个1个长度为10的句子做输入，那么seq_len = 10，batch_size =1

In [8]:
import torch
from torch import nn

rnn=nn.RNN(input_size=100,hidden_size=20,num_layers=4)
print('rnn:',rnn)

x=torch.randn(10,1,100)
h0=torch.zeros(4,1,20)

out,h=rnn(x,h0)
print('ouput.shape:',output.shape)
print('h.shape:',h.shape)

rnn: RNN(100, 20, num_layers=4)
ouput.shape: torch.Size([1])
h.shape: torch.Size([4, 1, 20])


### 5.RNNCell

In [9]:
import torch
from torch import nn

'''单层RNN, feature_len=100, hidden_len=20'''
cell1=nn.RNNCell(100,20)
h1=torch.zeros(3,20)
x=torch.zeros(10,3,100)
for xt in x:
    h1=cell1(xt,h1)
print(h1.shape)

torch.Size([3, 20])


In [10]:
'''多层RNN'''
cell1=nn.RNNCell(100,30)
cell2=nn.RNNCell(30,20)

h1=torch.zeros(3,30)
h2=torch.zeros(3,20)
x=torch.rand(10,3,100)
for xt in x:
    h1=cell1(xt,h1)
    h2=cell2(h1,h2)

print('h1.shape: ',h1.shape)
print('h2.shape: ',h2.shape)

h1.shape:  torch.Size([3, 30])
h2.shape:  torch.Size([3, 20])


### 6.LSTM

In [11]:
import torch
from torch import nn

#4层的LSTM,输入的每个词用100维向量表示,隐藏单元和记忆单元的尺寸是20
lstm=nn.LSTM(input_size=100,hidden_size=20,num_layers=4)
#3句话，每句10个单词，每个单词的词向量维度(长度)100
x=torch.rand(10,3,100)
#不传入h_0和c_0则会默认初始化
out,(h,c)=lstm(x)

print('out.shape: ',out.shape)
print('h.shape: ',h.shape)
print('c.shape: ',c.shape)

out.shape:  torch.Size([10, 3, 20])
h.shape:  torch.Size([4, 3, 20])
c.shape:  torch.Size([4, 3, 20])


### 7.LSTMCell

In [12]:
import torch
from torch import nn

# 单层LSTM
# 1层的LSTM，输入的每个词用100维向量表示，隐藏单元和记忆单元的尺寸是20
cell=nn.LSTMCell(input_size=100,hidden_size=20)
# seq_len=10个时刻的输入,每个时刻shape都是[batch,feature_len]
x=torch.randn(10,3,100)

# 初始化隐藏单元h和记忆单元c,取batch=3
h=torch.zeros(3,20)
c=torch.zeros(3,20)

# 对每个时刻,传入输入xt和上个时刻的h和c
for xt in x:
    b,c=cell(xt,(h,c))

print('b.shape: ',b.shape)
print('c.shape: ',c.shape)

b.shape:  torch.Size([3, 20])
c.shape:  torch.Size([3, 20])


In [13]:
# 两层LSTM
# 输入的feature_len=100,隐藏单元和记忆单元hidden_len=30
cell1=nn.LSTMCell(input_size=100,hidden_size=30)
# hidden_len从L0层的30变到这一层的20
cell2=nn.LSTMCell(input_size=30,hidden_size=20)

# 分别初始化L0层和L1层的隐藏单元h 和 记忆单元C,取batch=3
h1=torch.zeros(3,30)
C1=torch.zeros(3,30)

h2=torch.zeros(3,20)
C2=torch.zeros(3,20)
x=torch.randn(10,3,100)

for xt in x:
    # 1层接受xt输入
    h1,C1=cell1(xt,(h1,C1))
    # 2层接受1层的输出h作为输入
    h2,C2=cell2(h1,(h2,C2))

print(h1.shape,C1.shape)
print(h2.shape,C2.shape)

torch.Size([3, 30]) torch.Size([3, 30])
torch.Size([3, 20]) torch.Size([3, 20])


### 8.词嵌入

In [14]:
import torch
import torch.nn as nn

#给单词编索引号
word_to_idx={'hello':0,'world':1}
#得到目标单词索引
lookup_tensor=torch.tensor([word_to_idx['hello']],dtype=torch.long)
#print(lookup_tensor)
embeds=nn.Embedding(num_embeddings=2,embedding_dim=5)
#传入单词的index，返回对应的嵌入向量
hello_embed=embeds(lookup_tensor)
print(hello_embed)

tensor([[ 0.5624, -1.2842,  0.7857,  0.6619,  1.3636]],
       grad_fn=<EmbeddingBackward>)


### 9.神经网络示例

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        #定义一个卷积操作：1个图像通道, 6个输出通道, 5x5卷积
        self.conv1=nn.Conv2d(1,6,5)
        self.conv2=nn.Conv2d(6,16,5)

        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)

    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(-1,self.num_flat_features(x))
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size=x.size()[1:] #计算除通道以外的所有大小
        # print(x.size())
        # print(size)
        num_features=1
        for s in size:
            num_features*=s
        return num_features
    
'''生成网络'''
net=Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [16]:
'''模型可训练的参数可以通过调用 net.parameters() 返回'''
params=list(net.parameters())
print(len(params))
print(params[0].size()) #第一层卷积的权重系数

10
torch.Size([6, 1, 5, 5])


In [17]:
'''随机生成一个 32x32 的输入'''
input=torch.randn(1,1,32,32)
out=net(input)
print(out)

tensor([[-0.0888,  0.0153,  0.0629, -0.0479, -0.0595,  0.1029,  0.0002, -0.0510,
         -0.0515,  0.0367]], grad_fn=<AddmmBackward>)


In [18]:
'''把所有参数梯度缓存器置零，用随机的梯度来反向传播'''
net.zero_grad()
out.backward(torch.randn(1,10))