In [1]:
import torch
import numpy as np

# 数据操作

## 创建张量

In [2]:
# 创建空的张量
x = torch.empty(5, 3)
x

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.4058e+02, -2.4394e-41,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [3]:
# 创建随机张量
x = torch.rand(5, 3)
x

tensor([[0.3320, 0.8642, 0.0843],
        [0.5064, 0.8842, 0.3225],
        [0.2590, 0.9896, 0.6651],
        [0.9001, 0.1219, 0.7102],
        [0.5948, 0.1458, 0.7478]])

In [4]:
x = torch.zeros(5, 3, dtype=torch.long)
x

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [5]:
x = torch.tensor([5, 3])
print(x)
print(x.shape)

tensor([5, 3])
torch.Size([2])


In [6]:
# 返回的tensor默认具有相同的torch.dtype和torch.device
x = x.new_ones(5, 3, dtype=torch.float64)
print(x)
x = torch.randn_like(x)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[ 0.7420,  1.0910,  0.7244],
        [ 0.3394, -0.2583,  1.8374],
        [ 0.2697, -1.2103,  0.2198],
        [-0.3692, -1.1297,  0.2661],
        [ 0.1897,  0.2342,  1.5984]], dtype=torch.float64)


## 张量计算

In [7]:
x = torch.rand(5, 3)
print(x)
y = torch.rand(5, 3)
print(y)
z3 = torch.empty_like(x)
z1 = x+y
z2 = torch.add(x, y)
torch.add(x, y, out=z3)
assert z1.equal(z2) and z2.equal(z3)
print(z3)
# inplace
x.add_(y)
assert x.equal(z3)

tensor([[0.4928, 0.9540, 0.7717],
        [0.7873, 0.3879, 0.0653],
        [0.3126, 0.9069, 0.6232],
        [0.5599, 0.2179, 0.1127],
        [0.4148, 0.3433, 0.9053]])
tensor([[0.0507, 0.7102, 0.4789],
        [0.0714, 0.0100, 0.2006],
        [0.2647, 0.0670, 0.0339],
        [0.9937, 0.9781, 0.5189],
        [0.6912, 0.0874, 0.6480]])
tensor([[0.5435, 1.6642, 1.2506],
        [0.8586, 0.3979, 0.2659],
        [0.5774, 0.9739, 0.6570],
        [1.5536, 1.1961, 0.6316],
        [1.1060, 0.4307, 1.5533]])


## 索引

In [8]:
# 我们还可以使用类似NumPy的索引操作来访问Tensor的一部分，需要注意的是：索引出来的结果与原数据共享内存，也即修改一个，另一个会跟着修改。
y = z1[0, :]
print(y)
y += 1
print(y)
# z1的值也变了
print(z1)

tensor([0.5435, 1.6642, 1.2506])
tensor([1.5435, 2.6642, 2.2506])
tensor([[1.5435, 2.6642, 2.2506],
        [0.8586, 0.3979, 0.2659],
        [0.5774, 0.9739, 0.6570],
        [1.5536, 1.1961, 0.6316],
        [1.1060, 0.4307, 1.5533]])


In [9]:
# 根据指定维度筛选某几行
indexs = torch.tensor([2, 1])
torch.index_select(z1, 0, indexs)

tensor([[0.5774, 0.9739, 0.6570],
        [0.8586, 0.3979, 0.2659]])

In [10]:
# gather 是按轴向(axis)，按照指定行取数据 
a = torch.arange(0,16).view(4,4)
print(a)
index = torch.LongTensor([[0,1,2,3]])
print(a.gather(0,index))
# ==========
index = torch.LongTensor([[2,1,2,2]])
print(a.gather(0,index))

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]])
tensor([[ 0,  5, 10, 15]])
tensor([[ 8,  5, 10, 11]])


In [11]:
# 根据mask进行选择
torch.masked_select(z1, z1 > 0)

tensor([1.5435, 2.6642, 2.2506, 0.8586, 0.3979, 0.2659, 0.5774, 0.9739, 0.6570,
        1.5536, 1.1961, 0.6316, 1.1060, 0.4307, 1.5533])

In [12]:
# 非零元素的下标
a = torch.empty(3, 3).uniform_(0, 1)  # generate a uniform random matrix with range [0, 1]
a = torch.bernoulli(a)
print(a)
print(torch.nonzero(a))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [0., 1., 1.]])
tensor([[0, 0],
        [0, 1],
        [0, 2],
        [1, 0],
        [1, 1],
        [1, 2],
        [2, 1],
        [2, 2]])


## 修改形状

In [13]:
a = torch.arange(0, 16)
print(a.view(16))
print(a.view(4, -1))
print(a.view(2, 2, 4))

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]])
tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7]],

        [[ 8,  9, 10, 11],
         [12, 13, 14, 15]]])


In [14]:
# view也是浅拷贝，原始数据修改也会导致view之后的数据更改
b = a.view(4, -1)
a += 1
print(a)
print(b)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [13, 14, 15, 16]])


In [15]:
# 如果想创建副本，建议使用clone的方式进行创建
x = torch.tensor([1,2,3])
x_cp = x.clone()
print(id(x)==id(x_cp))

False


## 线性代数

|函数|功能|
|---|---|
|trace|对角线元素之和(矩阵的迹)|
|diag|对角线元素|
|triu/tril|矩阵的上三角/下三角，可指定偏移量|
|mm/bmm|矩阵乘法，batch的矩阵乘法|
|addmm/addbmm/addmv/addr/baddbmm..|矩阵运算|
|t|转置|
|dot/cross|向量内积/外积|
|inverse|求逆矩阵|
|svd|奇异值分解|

In [16]:
a = torch.randn(1,2)
b = torch.randn(2,2)
print(a)
print(b)
print('='*20)
print(b.trace())
print('='*20)
print(b.diag())
print('='*20)
print(b.triu())
print('='*20)
print(b.tril())
print('='*20)
print(b.t())
print('='*20)
print(b.inverse())
print('='*20)
print(b.svd())
print('='*20)
# 矩阵乘法
print(torch.matmul(a,b))
# 

tensor([[0.4393, 0.8236]])
tensor([[ 1.6200,  1.7029],
        [-0.8190, -0.0148]])
tensor(1.6053)
tensor([ 1.6200, -0.0148])
tensor([[ 1.6200,  1.7029],
        [ 0.0000, -0.0148]])
tensor([[ 1.6200,  0.0000],
        [-0.8190, -0.0148]])
tensor([[ 1.6200, -0.8190],
        [ 1.7029, -0.0148]])
tensor([[-0.0108, -1.2422],
        [ 0.5975,  1.1818]])
torch.return_types.svd(
U=tensor([[-0.9679,  0.2514],
        [ 0.2514,  0.9679]]),
S=tensor([2.4240, 0.5655]),
V=tensor([[-0.7318, -0.6815],
        [-0.6815,  0.7318]]))
tensor([[0.0371, 0.7360]])


## tensor和numpy互转

In [17]:
a = np.random.randn(3,2)
b = torch.from_numpy(a)
print(a)
print(b)

[[-0.36351752 -0.13347013]
 [-0.76513656 -0.35483407]
 [-0.43934627  0.86029124]]
tensor([[-0.3635, -0.1335],
        [-0.7651, -0.3548],
        [-0.4393,  0.8603]], dtype=torch.float64)


# 自动求梯度

- 概念
    - 如果需要对张量进行梯度传播，那么需要将. **requires_grad=True** ,完成计算后可以调用 **backup()** 进行梯度计算，梯度累计到 **.grad** 属性 
    - 如果不想被追踪，可以调用 **.detach()**，这样梯度就不会继续传递。此外还可以使用 **with torch.no_grad()** 包裹起来（通常在模型评估时使用）
    - **Function** 是一个很重要的类，**Tensor** 与 **Function** 可以构建DAG，每个tensor都有一个 **.grad_fn** 属性，该属性记录是从哪计算出来的方便调试

In [18]:
# 自己定义的张量grad_fn为None,属于叶子结点
x = torch.ones(2, 2, requires_grad=True)
print(x)
print(x.grad_fn)
print('='*50)
y = torch.add(x, 1)
print(y.grad_fn)
print(x.is_leaf, y.is_leaf)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
None
<AddBackward0 object at 0x000001891BB47148>
True False


In [19]:
a = torch.randn(2, 2)
print(a)
a = ((a * 3) / (a - 1))
print(a.requires_grad)  # False
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

tensor([[-2.5831, -1.1134],
        [-0.8542, -1.6061]])
False
True
<SumBackward0 object at 0x000001891BB478C8>


## 梯度

- 不允许张量对张量求导，只允许标量对张量求导，求导结果和自变量同型的张量

In [20]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
print(out)

tensor(27., grad_fn=<MeanBackward0>)


In [21]:
out.backward()
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [22]:
# 梯度是累加的，所以通常要在反向传播之前把梯度清0
out2 = x.sum()
out2.backward()
print(x.grad)

out3 = x.sum()
x.grad.data.zero_()
out3.backward()
print(x.grad)

tensor([[5.5000, 5.5000],
        [5.5000, 5.5000]])
tensor([[1., 1.],
        [1., 1.]])


In [23]:
x = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
y = 2 * x
z = y.view(2, 2)
print(z)

tensor([[2., 4.],
        [6., 8.]], grad_fn=<ViewBackward>)


In [24]:
v = torch.tensor([[1.0, 0.1], [0.01, 0.001]], dtype=torch.float)
z.backward(v)
print(x.grad)

tensor([2.0000, 0.2000, 0.0200, 0.0020])


假设 x 经过一番计算得到 y，那么 y.backward(w) 求的不是 y 对 x 的导数，而是 l = torch.sum(y*w) 对 x 的导数。w 可以视为 y 的各分量的权重，也可以视为遥远的损失函数 l 对 y 的偏导数。也就是说，不一定需要从计算图最后的节点 y 往前反向传播，从中间某个节点 n 开始传也可以，只要你能把损失函数 l 关于这个节点的导数 dl/dn 记录下来，n.backward(dl/dn) 照样能往前回传，正确地计算出损失函数 l 对于节点 n 之前的节点的导数。特别地，若 y 为标量，w 取默认值 1.0，才是按照我们通常理解的那样，求 y 对 x 的导数。

In [25]:
import torch
from torch.autograd import Variable
x = Variable(torch.randn(3), requires_grad=True)
y = Variable(torch.randn(3), requires_grad=True)
z = Variable(torch.randn(3), requires_grad=True)
print(x)
print(y)
print(z)

t = x + y
# (x+y).t * z  
l = t.dot(z)

tensor([-0.1455, -0.8896,  0.3153], requires_grad=True)
tensor([ 1.2853, -0.7328,  0.2472], requires_grad=True)
tensor([1.4532, 0.6524, 0.1970], requires_grad=True)


In [26]:
l.backward(retain_graph=True)
print(x.grad)
print(y.grad) # x.grad = y.grad = z
print(z)

print(z.grad) # z.grad = t = x + y
print(t)

tensor([1.4532, 0.6524, 0.1970])
tensor([1.4532, 0.6524, 0.1970])
tensor([1.4532, 0.6524, 0.1970], requires_grad=True)
tensor([ 1.1399, -1.6224,  0.5625])
tensor([ 1.1399, -1.6224,  0.5625], grad_fn=<AddBackward0>)


In [27]:
# 上下等效
x.grad.data.zero_()
y.grad.data.zero_()
z.grad.data.zero_()

t.backward(z)
print(x.grad)
print(y.grad)

tensor([1.4532, 0.6524, 0.1970])
tensor([1.4532, 0.6524, 0.1970])


In [28]:
x = torch.tensor(1.0, requires_grad=True)
y1 = x ** 2 
with torch.no_grad():
    y2 = x ** 3
y3 = y1 + y2

print(x.requires_grad)
print(y1, y1.requires_grad) # True
print(y2, y2.requires_grad) # False
print(y3, y3.requires_grad) # True

True
tensor(1., grad_fn=<PowBackward0>) True
tensor(1.) False
tensor(2., grad_fn=<AddBackward0>) True
