In [3]:
from torch import nn
import torch

linear = nn.Linear(32,2)
inputs = torch.rand(3,32)

outputs = linear(inputs)
print(outputs)

tensor([[0.2416, 0.1069],
        [0.3111, 0.1202],
        [0.0182, 0.0249]], grad_fn=<AddmmBackward0>)


In [4]:
from torch.nn import functional as F
activation = F.sigmoid(outputs)
print(activation)

tensor([[0.5601, 0.5267],
        [0.5772, 0.5300],
        [0.5046, 0.5062]], grad_fn=<SigmoidBackward0>)


In [5]:
activation = F.softmax(outputs,dim=1)
print(activation)

tensor([[0.5336, 0.4664],
        [0.5476, 0.4524],
        [0.4983, 0.5017]], grad_fn=<SoftmaxBackward0>)


In [6]:
activation = F.relu(outputs)
print(activation)

tensor([[0.2416, 0.1069],
        [0.3111, 0.1202],
        [0.0182, 0.0249]], grad_fn=<ReluBackward0>)


- 自定义神经网络模型

In [8]:
 import torch
 from torch import nn
 from torch.nn import functional as F

 class MLP(nn.Module):
  def __init__(self,input_dim,hidden_dim,num_class):
    super(MLP, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_dim)
    self.activate = F.relu
    self.linear2 = nn.Linear(hidden_dim, num_class)

  def forward(self, inputs):
    hidden = self.linear1(inputs)
    activation = self.activate(hidden)
    outputs =self.linear2(activation)
    probs = F.softmax(outputs, dim=1)
    return probs

mlp = MLP(input_dim=4,hidden_dim=5, num_class=2)
inputs = torch.rand(3,4)
probs = mlp(inputs)
print(probs)

tensor([[0.3804, 0.6196],
        [0.4273, 0.5727],
        [0.3861, 0.6139]], grad_fn=<SoftmaxBackward0>)


- 卷积神经网络

In [9]:
import torch
from torch.nn import Conv1d

conv1 = Conv1d(5,2,4)
conv2 = Conv1d(5,2,3)

inputs = torch.rand(2,5,6)

outputs1 = conv1(inputs)
outputs2 = conv2(inputs)

print(outputs1)
print(outputs2)

tensor([[[ 0.0576,  0.1622, -0.2234],
         [-0.2325, -0.3413, -0.2250]],

        [[-0.3782, -0.1242, -0.1772],
         [-0.2281, -0.3157, -0.2179]]], grad_fn=<ConvolutionBackward0>)
tensor([[[ 0.6040,  0.5271,  0.7065,  0.5967],
         [-0.1372, -0.2387, -0.4239, -0.2224]],

        [[ 0.6710,  0.5483,  0.5937,  0.5187],
         [-0.3526, -0.2052, -0.3864, -0.3359]]],
       grad_fn=<ConvolutionBackward0>)


In [12]:
from torch.nn import MaxPool1d

pool1 = MaxPool1d(3)
pool2 = MaxPool1d(4)

outputs1_pool1 = pool1(outputs1)
print(outputs1_pool1)

outputs2_pool2 = pool2(outputs2)
print(outputs2_pool2)

tensor([[[ 0.1622],
         [-0.2250]],

        [[-0.1242],
         [-0.2179]]], grad_fn=<SqueezeBackward1>)
tensor([[[ 0.7065],
         [-0.1372]],

        [[ 0.6710],
         [-0.2052]]], grad_fn=<SqueezeBackward1>)


In [14]:
import torch.nn.functional as F
outputs_pool1 = F.max_pool1d(outputs1, kernel_size=outputs1.shape[2])
print(outputs1_pool1)

outputs_pool2 = F.max_pool1d(outputs2, kernel_size=outputs2.shape[2])
print(outputs_pool2)


tensor([[[ 0.1622],
         [-0.2250]],

        [[-0.1242],
         [-0.2179]]], grad_fn=<SqueezeBackward1>)
tensor([[[ 0.7065],
         [-0.1372]],

        [[ 0.6710],
         [-0.2052]]], grad_fn=<SqueezeBackward1>)


In [15]:
outputs_pool_squeeze1 = outputs_pool1.squeeze(dim=2)
print(outputs_pool_squeeze1)

outputs_pool_squeeze2 = outputs_pool2.squeeze(dim=2)
print(outputs_pool_squeeze2)

outputs_pool = torch.cat([outputs_pool_squeeze1,outputs_pool_squeeze2],dim=1)

print(outputs_pool)

from torch.nn import Linear

linear = Linear(4,2)
outputs_linear=linear(outputs_pool)
print(outputs_linear)

tensor([[ 0.1622, -0.2250],
        [-0.1242, -0.2179]], grad_fn=<SqueezeBackward1>)
tensor([[ 0.7065, -0.1372],
        [ 0.6710, -0.2052]], grad_fn=<SqueezeBackward1>)
tensor([[ 0.1622, -0.2250,  0.7065, -0.1372],
        [-0.1242, -0.2179,  0.6710, -0.2052]], grad_fn=<CatBackward0>)
tensor([[ 0.2137, -0.3520],
        [ 0.3050, -0.3732]], grad_fn=<AddmmBackward0>)


- 循环神经网络

In [16]:
from torch.nn import RNN
rnn = RNN(input_size=4, hidden_size=5,batch_first=True)

inputs = torch.rand(2,3,4)

outputs, hn = rnn(inputs)

print(outputs)

print(hn)

print(outputs.shape, hn.shape)

tensor([[[ 0.1793, -0.0988,  0.7587, -0.0575, -0.7716],
         [-0.0714,  0.0189,  0.7073, -0.3451, -0.6584],
         [ 0.1919, -0.2825,  0.7780, -0.5581, -0.8399]],

        [[ 0.1646, -0.2326,  0.7935, -0.1408, -0.7047],
         [ 0.3552, -0.3788,  0.7323, -0.2776, -0.5805],
         [ 0.1527, -0.4057,  0.8812, -0.4943, -0.7798]]],
       grad_fn=<TransposeBackward1>)
tensor([[[ 0.1919, -0.2825,  0.7780, -0.5581, -0.8399],
         [ 0.1527, -0.4057,  0.8812, -0.4943, -0.7798]]],
       grad_fn=<StackBackward0>)
torch.Size([2, 3, 5]) torch.Size([1, 2, 5])


In [17]:
from torch.nn import LSTM
lstm = LSTM(input_size=4, hidden_size=5, batch_first=True)
inputs = torch.rand(2,3,4)

outputs, (hn,cn) = lstm(inputs)

print(outputs)

print(hn)

print(cn)

print(outputs.shape, hn.shape, cn.shape)

tensor([[[-0.1271,  0.1416, -0.1430,  0.0548, -0.0897],
         [-0.2134,  0.1865, -0.2268,  0.1111, -0.1690],
         [-0.3167,  0.2016, -0.1689, -0.0131, -0.2028]],

        [[-0.1477,  0.1590, -0.0769,  0.0089, -0.0942],
         [-0.1774,  0.1872, -0.1744,  0.1229, -0.1311],
         [-0.2850,  0.1583, -0.2635,  0.0719, -0.1983]]],
       grad_fn=<TransposeBackward0>)
tensor([[[-0.3167,  0.2016, -0.1689, -0.0131, -0.2028],
         [-0.2850,  0.1583, -0.2635,  0.0719, -0.1983]]],
       grad_fn=<StackBackward0>)
tensor([[[-0.6079,  0.3925, -0.3893, -0.0209, -0.3846],
         [-0.5163,  0.2950, -0.5286,  0.1134, -0.4858]]],
       grad_fn=<StackBackward0>)
torch.Size([2, 3, 5]) torch.Size([1, 2, 5]) torch.Size([1, 2, 5])


- Transformer

In [18]:
encoder_layer = nn.TransformerEncoderLayer(d_model=4,nhead=2)
src = torch.rand(2,3,4)

out = encoder_layer(src)

print(out)

tensor([[[ 0.8180, -1.5911, -0.1137,  0.8867],
         [ 0.5552, -0.5168, -1.3276,  1.2892],
         [ 0.2924, -1.7016,  0.5902,  0.8189]],

        [[ 0.3458, -1.7152,  0.6658,  0.7037],
         [ 1.2341, -1.4354,  0.5457, -0.3444],
         [-1.5780, -0.1026,  1.0490,  0.6316]]],
       grad_fn=<NativeLayerNormBackward0>)


In [20]:
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
out = transformer_encoder(src)
print(out)

tensor([[[ 0.7625, -1.4378, -0.4118,  1.0871],
         [ 0.1225, -1.3796, -0.1751,  1.4321],
         [ 0.7397, -1.6816,  0.7723,  0.1696]],

        [[ 0.8837, -1.6951,  0.3153,  0.4962],
         [ 1.1933, -1.2132,  0.7528, -0.7329],
         [-1.0754, -0.9211,  1.0312,  0.9653]]],
       grad_fn=<NativeLayerNormBackward0>)




In [22]:
memory = transformer_encoder(src)
decoder_layer = nn.TransformerDecoderLayer(d_model=4, nhead=2)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
out_part = torch.rand(2,3,4)
out = transformer_decoder(out_part, memory)

print(out)


tensor([[[-0.2979,  0.5342,  1.2231, -1.4594],
         [ 0.1066, -1.5431,  0.1818,  1.2547],
         [-0.3130,  0.8409,  0.9720, -1.5000]],

        [[-0.9633,  0.3224,  1.4958, -0.8548],
         [ 1.3743, -1.2874,  0.4309, -0.5179],
         [-0.1883,  0.4357,  1.2445, -1.4919]]],
       grad_fn=<NativeLayerNormBackward0>)


- 梯度下降的训练实战

In [24]:
import torch
from torch import nn,optim
from torch.nn import functional as F

class MLP(nn.Module):
  def __init__(self,input_dim,hidden_dim,num_class):
    super(MLP, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_dim)
    self.activate = F.relu
    self.linear2 = nn.Linear(hidden_dim, num_class)

  def forward(self, inputs):
    hidden = self.linear1(inputs)
    activation = self.activate(hidden)
    outputs =self.linear2(activation)
    log_probs = F.log_softmax(outputs, dim=1)
    return log_probs

x_train = torch.tensor([[0.0,0.0],[0.0,1.0],[1.0,0.0],[1.0,1.0]])
y_train = torch.tensor([0,1,1,0])

model = MLP(input_dim = 2, hidden_dim = 5, num_class = 2)

criterion = nn.NLLLoss()

optimizer = optim.SGD(model.parameters(), lr=0.05)

for epoch in range(500):
  y_pred = model(x_train)
  loss = criterion(y_pred, y_train)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

print('Parameters:')
for name, param in model.named_parameters():
  print(name, param.data)

y_pred = model(x_train)
print('Predicted results:', y_pred.argmax(axis=1))

Parameters:
linear1.weight tensor([[ 1.2419, -1.2401],
        [ 1.0910,  1.0778],
        [-0.1706,  0.8729],
        [-0.2930, -0.2859],
        [-0.1691, -0.3952]])
linear1.bias tensor([ 8.9211e-04, -1.0896e+00,  3.9133e-01, -5.9601e-01, -3.8474e-01])
linear2.weight tensor([[-1.0757,  1.3517, -0.6965, -0.3069, -0.2496],
        [ 1.1203, -1.0164,  0.2546, -0.2340, -0.2441]])
linear2.bias tensor([ 0.2727, -0.3108])
Predicted results: tensor([0, 1, 1, 0])
