In [2]:
import torch
from torch import nn

In [5]:
import torch
from torch import nn

'''单层RNN, feature_len=100, hidden_len=20'''
cell1=nn.RNNCell(100,20)
h1=torch.zeros(3,20)
x=torch.zeros(10,3,100)
for xt in x:
    h1=cell1(xt,h1)
print(h1.shape)

torch.Size([3, 20])


In [6]:
'''多层RNN'''
cell1=nn.RNNCell(100,30)
cell2=nn.RNNCell(30,20)

h1=torch.zeros(3,30)
h2=torch.zeros(3,20)
x=torch.rand(10,3,100)
for xt in x:
    h1=cell1(xt,h1)
    h2=cell2(h1,h2)

print('h1.shape: ',h1.shape)
print('h2.shape: ',h2.shape)

h1.shape:  torch.Size([3, 30])
h2.shape:  torch.Size([3, 20])


### LSTM

In [7]:
import torch
from torch import nn

#4层的LSTM,输入的每个词用100维向量表示,隐藏单元和记忆单元的尺寸是20
lstm=nn.LSTM(input_size=100,hidden_size=20,num_layers=4)
#3句话，每句10个单词，每个单词的词向量维度(长度)100
x=torch.rand(10,3,100)
#不传入h_0和c_0则会默认初始化
out,(h,c)=lstm(x)

print('out.shape: ',out.shape)
print('h.shape: ',h.shape)
print('c.shape: ',c.shape)

out.shape:  torch.Size([10, 3, 20])
h.shape:  torch.Size([4, 3, 20])
c.shape:  torch.Size([4, 3, 20])


### LSTMCell

In [8]:
import torch
from torch import nn

# 单层LSTM
# 1层的LSTM，输入的每个词用100维向量表示，隐藏单元和记忆单元的尺寸是20
cell=nn.LSTMCell(input_size=100,hidden_size=20)
# seq_len=10个时刻的输入,每个时刻shape都是[batch,feature_len]
x=torch.randn(10,3,100)

# 初始化隐藏单元h和记忆单元c,取batch=3
h=torch.zeros(3,20)
c=torch.zeros(3,20)

# 对每个时刻,传入输入xt和上个时刻的h和c
for xt in x:
    b,c=cell(xt,(h,c))

print('b.shape: ',b.shape)
print('c.shape: ',c.shape)

b.shape:  torch.Size([3, 20])
c.shape:  torch.Size([3, 20])


In [9]:
# 两层LSTM
# 输入的feature_len=100,隐藏单元和记忆单元hidden_len=30
cell1=nn.LSTMCell(input_size=100,hidden_size=30)
# hidden_len从L0层的30变到这一层的20
cell2=nn.LSTMCell(input_size=30,hidden_size=20)

# 分别初始化L0层和L1层的隐藏单元h 和 记忆单元C,取batch=3
h1=torch.zeros(3,30)
C1=torch.zeros(3,30)

h2=torch.zeros(3,20)
C2=torch.zeros(3,20)
x=torch.randn(10,3,100)

for xt in x:
    # 1层接受xt输入
    h1,C1=cell1(xt,(h1,C1))
    # 2层接受1层的输出h作为输入
    h2,C2=cell2(h1,(h2,C2))

print(h1.shape,C1.shape)
print(h2.shape,C2.shape)

torch.Size([3, 30]) torch.Size([3, 30])
torch.Size([3, 20]) torch.Size([3, 20])


### 词嵌入

In [3]:
import torch
import torch.nn as nn

#给单词编索引号
word_to_idx={'hello':0,'world':1}
#得到目标单词索引
lookup_tensor=torch.tensor([word_to_idx['hello']],dtype=torch.long)
#print(lookup_tensor)
embeds=nn.Embedding(num_embeddings=2,embedding_dim=5)
#传入单词的index，返回对应的嵌入向量
hello_embed=embeds(lookup_tensor)
print(hello_embed)

tensor([0])
tensor([[ 0.8669,  0.6445,  0.7020, -0.7155, -0.7310]],
       grad_fn=<EmbeddingBackward>)


### 神经网络示例

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        #定义一个卷积操作：1个图像通道, 6个输出通道, 5x5卷积
        self.conv1=nn.Conv2d(1,6,5)
        self.conv2=nn.Conv2d(6,16,5)

        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)

    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(-1,self.num_flat_features(x))
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size=x.size()[1:] #计算除通道以外的所有大小
        # print(x.size())
        # print(size)
        num_features=1
        for s in size:
            num_features*=s
        return num_features
    
'''生成网络'''
net=Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [14]:
'''模型可训练的参数可以通过调用 net.parameters() 返回'''
params=list(net.parameters())
print(len(params))
print(params[0].size()) #第一层卷积的权重系数

10
torch.Size([6, 1, 5, 5])


In [19]:
'''随机生成一个 32x32 的输入'''
input=torch.randn(1,1,32,32)
out=net(input)
print(out)

tensor([[ 0.1035, -0.0263,  0.1190, -0.0997, -0.0257, -0.0047, -0.0257, -0.0539,
         -0.0380,  0.0274]], grad_fn=<AddmmBackward>)


In [21]:
'''把所有参数梯度缓存器置零，用随机的梯度来反向传播'''
net.zero_grad()
out.backward(torch.randn(1,10))