# torch的基本概念

In [23]:
import torch
x = torch.ones(5, 3)
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

torch.randn_like( )   
the output result has the same size of x

In [15]:
torch.randn_like(x, dtype=torch.float)

tensor([[-0.3793,  0.6885,  0.1290],
        [ 2.2294,  0.8574,  1.3236],
        [-2.0561, -0.5247,  0.6158],
        [-0.8583,  1.0905, -0.4343],
        [-0.2464,  1.6203, -0.2528]])

torch的加法

In [17]:
y = torch.rand(5, 3)
x + y

tensor([[1.4228, 1.0937, 1.6522],
        [1.0807, 1.4404, 1.2485],
        [1.7199, 1.7067, 1.5104],
        [1.3423, 1.2543, 1.3121],
        [1.8383, 1.4933, 1.3020]])

In [18]:
torch.add(x, y)

tensor([[1.4228, 1.0937, 1.6522],
        [1.0807, 1.4404, 1.2485],
        [1.7199, 1.7067, 1.5104],
        [1.3423, 1.2543, 1.3121],
        [1.8383, 1.4933, 1.3020]])

In [19]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [20]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([1.0310])
1.0309697389602661


In [21]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))   # 这个就是矩阵的乘法
print(a.requires_grad)

False


In [22]:
a

tensor([[1.2834, 0.0213],
        [1.9870, 0.9340]])

In [25]:
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

True
tensor(6.4680, grad_fn=<SumBackward0>)


你可以通过将代码包裹在 with torch.no_grad()，来停止对从跟踪历史中的 .requires_grad=True的
张量自动求导。

In [29]:
x = torch.randn(3, requires_grad=True)
x

tensor([ 0.1931, -2.3901, -0.5550], requires_grad=True)

In [30]:
x.requires_grad

True

In [32]:
(x ** 2).requires_grad

True

In [31]:
with torch.no_grad():
    print((x ** 2).requires_grad)

False


一个典型的神经网络训练过程包括以下几点：
    1.定义一个包含可训练参数的神经网络
    2.迭代整个输入
    3.通过神经网络处理输入
    4.计算损失(loss)
    5.反向传播梯度到神经网络的参数
    6.更新网络的参数，典型的用一个简单的更新方法：weight = weight - learning_rate *gradient

In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


一个模型可训练的参数可以通过调用 net.parameters() 返回：

In [38]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

10
torch.Size([6, 1, 5, 5])


让我们尝试随机生成一个 32x32 的输入。注意：期望的输入维度是 32x32

In [39]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0506, -0.0450, -0.1200,  0.1061, -0.1466, -0.0797,  0.0161,  0.0636,
          0.0417, -0.0973]], grad_fn=<AddmmBackward0>)
