# Tensor

`Tensors`类似于Numpy里的`Ndarray`，但它额外提供了`GPU`计算的支持。

In [1]:
import torch

# Construct a 5x3 matrix, uninitialized:
x = torch.empty(5,3)
print(x)
# Construct a randomly initialized matrix:
x = torch.rand(5, 3)
print(x)

tensor([[-1.4889e+23,  4.5745e-41, -1.4889e+23],
        [ 4.5745e-41, -1.0318e+17,  4.5745e-41],
        [-7.4245e+23,  4.5745e-41, -7.4245e+23],
        [ 4.5745e-41, -1.0320e+17,  4.5745e-41],
        [-7.0454e+02,  4.5745e-41, -1.0315e+17]])
tensor([[0.7052, 0.8031, 0.4363],
        [0.5338, 0.9389, 0.1755],
        [0.3233, 0.9696, 0.4796],
        [0.7728, 0.8230, 0.1470],
        [0.0462, 0.9701, 0.5859]])


In [2]:
# Construct a matrix filled zeros and of dtype long
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [3]:
# Construct a tensor directly from data
L = [[5.5, 3, 2.1],[0.3, 4.2, 10.6]]
x = torch.tensor(L)
print(x)

tensor([[ 5.5000,  3.0000,  2.1000],
        [ 0.3000,  4.2000, 10.6000]])


我们也可以直接基于一个现成的`tensor`，来创建新的`tensor`，新创建的`tensor`的属性（`size`、`dtype`等）直接复用现成的`tensor`

In [4]:
# override了size和dtype
x = x.new_ones(5, 3, dtype=torch.double)
print(x)
# override了dtype
x= torch.rand_like(x, dtype=torch.float)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[0.0861, 0.9166, 0.3194],
        [0.9537, 0.2678, 0.0254],
        [0.6886, 0.9874, 0.2918],
        [0.9089, 0.0101, 0.6995],
        [0.6246, 0.7031, 0.6960]])


# Tensor的算术运算

In [5]:
y = torch.randn(5, 3)
print(x + y)

tensor([[ 1.6770,  2.6719, -0.9050],
        [ 0.4588,  0.8278,  0.1423],
        [ 1.1016,  0.1436, -0.4275],
        [ 0.3418, -2.3666, -0.1582],
        [ 1.2109,  0.5013,  1.3343]])


In [6]:
print(torch.add(x, y))

tensor([[ 1.6770,  2.6719, -0.9050],
        [ 0.4588,  0.8278,  0.1423],
        [ 1.1016,  0.1436, -0.4275],
        [ 0.3418, -2.3666, -0.1582],
        [ 1.2109,  0.5013,  1.3343]])


In [7]:
z = torch.empty(5, 3)
torch.add(x, y, out=z)
print(z)

tensor([[ 1.6770,  2.6719, -0.9050],
        [ 0.4588,  0.8278,  0.1423],
        [ 1.1016,  0.1436, -0.4275],
        [ 0.3418, -2.3666, -0.1582],
        [ 1.2109,  0.5013,  1.3343]])


In [8]:
x.add_(y)
print(x)

tensor([[ 1.6770,  2.6719, -0.9050],
        [ 0.4588,  0.8278,  0.1423],
        [ 1.1016,  0.1436, -0.4275],
        [ 0.3418, -2.3666, -0.1582],
        [ 1.2109,  0.5013,  1.3343]])


# 切片

语法和Numpy是一样的

In [9]:
print(x[:, -2:])

tensor([[ 2.6719, -0.9050],
        [ 0.8278,  0.1423],
        [ 0.1436, -0.4275],
        [-2.3666, -0.1582],
        [ 0.5013,  1.3343]])


# Reshape

In [10]:
# Reshape出来的数组，和原数据是共享存储的
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)

# 和Numpy之间的转换

In [11]:
import numpy as np

a = np.random.randn(5,3)
print(a)

[[-1.27017145 -0.74941763  0.51099931]
 [ 1.5296282   0.12585976 -0.37058281]
 [-0.91401494  0.67946583  0.42061108]
 [ 0.20291402  1.80999375  0.43451031]
 [ 0.8513185   0.66788329  1.10964226]]


In [12]:
x = torch.from_numpy(a)
print(x)

tensor([[-1.2702, -0.7494,  0.5110],
        [ 1.5296,  0.1259, -0.3706],
        [-0.9140,  0.6795,  0.4206],
        [ 0.2029,  1.8100,  0.4345],
        [ 0.8513,  0.6679,  1.1096]], dtype=torch.float64)


In [13]:
np.add(a, 1, out=a)
print(a)
print(x)

[[-0.27017145  0.25058237  1.51099931]
 [ 2.5296282   1.12585976  0.62941719]
 [ 0.08598506  1.67946583  1.42061108]
 [ 1.20291402  2.80999375  1.43451031]
 [ 1.8513185   1.66788329  2.10964226]]
tensor([[-0.2702,  0.2506,  1.5110],
        [ 2.5296,  1.1259,  0.6294],
        [ 0.0860,  1.6795,  1.4206],
        [ 1.2029,  2.8100,  1.4345],
        [ 1.8513,  1.6679,  2.1096]], dtype=torch.float64)


# 使用CUDA

In [14]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to('cpu', torch.double))

tensor([[0.7298, 1.2506, 2.5110],
        [3.5296, 2.1259, 1.6294],
        [1.0860, 2.6795, 2.4206],
        [2.2029, 3.8100, 2.4345],
        [2.8513, 2.6679, 3.1096]], device='cuda:0', dtype=torch.float64)
tensor([[0.7298, 1.2506, 2.5110],
        [3.5296, 2.1259, 1.6294],
        [1.0860, 2.6795, 2.4206],
        [2.2029, 3.8100, 2.4345],
        [2.8513, 2.6679, 3.1096]], dtype=torch.float64)


# AutoGrad

In [15]:
import torch

x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [16]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [17]:
print(y.grad_fn)

<AddBackward0 object at 0x7f857bc67e80>


In [18]:
z = y * y * 3
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward1>)


In [19]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f857bc84898>


In [20]:
out.backward()
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [21]:
x = torch.randn(3, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
print(y)

tensor([345.9185, 656.6721, 750.0057], grad_fn=<MulBackward0>)


In [22]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)
print(x.grad)

tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


In [23]:
print(x.requires_grad)
print((x ** 2).requires_grad)

True
True


In [24]:
with torch.no_grad():
    print((x ** 2).requires_grad)

False


# Neural Networks

A typical training procedure for a neural network is as follows:

- Define the neural network that has some learnable parameters (or weights)
- Iterate over a dataset of inputs
- Process input through the network
- Compute the loss (how far is the output from being correct)
- Propagate gradients back into the network’s parameters
- Update the weights of the network, typically using a simple update rule: `weight = weight learning_rate * gradient`

## Define the network

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [26]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, X):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(X)), (2,2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [27]:
params  =list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [28]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0767,  0.0708, -0.0277,  0.1130, -0.0087, -0.0441,  0.0113,  0.1182,
          0.0886,  0.0271]], grad_fn=<AddmmBackward>)


In [29]:
net.zero_grad()
out.backward(torch.randn(1, 10))

## Loss Function

In [30]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(0.3177, grad_fn=<MseLossBackward>)


In [31]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7f857bc67d68>
<AddmmBackward object at 0x7f857bc67f60>
<AccumulateGrad object at 0x7f857bc67d68>


In [32]:
net.zero_grad()
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])


In [33]:
loss.backward()
print(net.conv1.bias.grad)

tensor([-0.0041, -0.0012, -0.0023, -0.0008,  0.0007, -0.0008])


In [34]:
lr = 0.001
for f in net.parameters():
    f.data.sub_(f.grad.data * lr)

In [35]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.01)

In [36]:
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()