In [1]:
import numpy as np
import torch
print(torch.__version__)

1.2.0


# Tensor介绍

## Tensor创建

In [2]:
torch.manual_seed(1)
torch.cuda.manual_seed(1)

**已有数据初始化**

- 根据python的list初始化 (deep-copy: 内存拷贝)

In [3]:
x = torch.tensor(range(2, 11, 2)) 
print(x)

tensor([ 2,  4,  6,  8, 10])


- 根据numpy的ndarray初始化

In [4]:
x = np.linspace(1, 10, 10)
# view不改变内存，只是重新设置了shape。
y1 = torch.from_numpy(x).view(2 ,5)  # shallow-copy: 内存共享; from_numpy()的反操作为tensorVar.numpy()
y2 = torch.tensor(x).view(2 ,5)  # deep-copy: 内存拷贝
y1 += 0.5
y2 += 10

print(x, '\n\n', y1, '\n\n', y2)

[ 1.5  2.5  3.5  4.5  5.5  6.5  7.5  8.5  9.5 10.5] 

 tensor([[ 1.5000,  2.5000,  3.5000,  4.5000,  5.5000],
        [ 6.5000,  7.5000,  8.5000,  9.5000, 10.5000]], dtype=torch.float64) 

 tensor([[11., 12., 13., 14., 15.],
        [16., 17., 18., 19., 20.]], dtype=torch.float64)


- 用cpu/gpu的变量初始化gpu/cpu变量

In [5]:
x = np.linspace(1, 10, 10)  # np创建性函数大都默认float64类型
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
y = torch.tensor(x)
z = y.to(device)  # 用cpu的变量初始化gpu变量  (deep-copy: 内存拷贝)

print(id(x)==id(y), id(y)==id(z))
print(x, '\n\n')
print(y, '\n\n')
print(z, '\n\n')
print(y.to(device)+z, '\n\n')  # y+z : error--due to different device type
print(z.to('cpu'), '\n\n') # 用gpu的变量初始化cpu变量  (deep-copy: 内存拷贝)

False False
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.] 


tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], dtype=torch.float64) 


tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='cuda:0',
       dtype=torch.float64) 


tensor([ 2.,  4.,  6.,  8., 10., 12., 14., 16., 18., 20.], device='cuda:0',
       dtype=torch.float64) 


tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], dtype=torch.float64) 




**函数创建式：Tensor/empty/zeros/ones/diag/linspace/arange/rand/randn/randint**

In [6]:
x = torch.Tensor(2, 4)
y = torch.Tensor([1, 2, 3])
print(x, '\n\n', y)

tensor([[0.0000e+00, 0.0000e+00, 3.1964e-18, 3.0952e-41],
        [3.1964e-18, 3.0952e-41, 3.1964e-18, 3.0952e-41]]) 

 tensor([1., 2., 3.])


In [7]:
x = torch.empty(5, 3)
print(x)

tensor([[-3.0354e-19,  4.5829e-41, -3.0354e-19],
        [ 4.5829e-41,  4.4842e-44,  0.0000e+00],
        [ 4.4842e-44,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00, -3.0354e-19,  4.5829e-41],
        [ 3.8016e-39,  0.0000e+00,  4.6243e-44]])


In [8]:
x = torch.zeros((2, 4)) == torch.zeros(2, 4)  # *size即可已接受一串整数，也能接受list或tuple
print(x)

tensor([[True, True, True, True],
        [True, True, True, True]])


In [9]:
x = torch.ones(4, 1, dtype=torch.float64)  # 可以指定dtype, 默认float32
print(x)

tensor([[1.],
        [1.],
        [1.],
        [1.]], dtype=torch.float64)


In [10]:
x = torch.diag(torch.tensor([1.0, 2.0, 3.0]), diagonal=-1)
print(x)

tensor([[0., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 3., 0.]])


In [11]:
x = torch.linspace(2, 20 ,steps=10, dtype=torch.float32).view(2, 5)
# linspace的dtype必须为float(32/64)类型，其他类型未实现
print(x)

tensor([[ 2.,  4.,  6.,  8., 10.],
        [12., 14., 16., 18., 20.]])


In [12]:
x = torch.arange(2, 20.001 ,step=2, dtype=torch.float32).view(-1, 5) 
# arange的dtype必须为float(32/64)类型，其他类型未实现
# -1 代表该维度可由别的维度推断出来
print(x)

tensor([[ 2.,  4.,  6.,  8., 10.],
        [12., 14., 16., 18., 20.]])


In [13]:
x = torch.rand(1, 6)  # 标准分布
print(x)

tensor([[0.7576, 0.2793, 0.4031, 0.7347, 0.0293, 0.7999]])


In [14]:
x = torch.randn(1, 6)  # 正态分布
print(x)

tensor([[ 0.5636,  1.1431,  0.8590,  0.7056, -0.3406, -1.2720]])


In [15]:
x = torch.randint(1, 6, [3, 4])  # 随机整数
print(x)


tensor([[3, 2, 1, 4],
        [2, 5, 4, 2],
        [5, 3, 5, 5]])


## Tensor使用

**算术操作**

In [16]:
x  = torch.ones(3, 3)
y = torch.rand_like(x)
z = x + y  # 加法形式 1
torch.add(x, y)  # 加法形式 2
y.add_(x)  # 加法形式 3, 等价于 y[:] = x + y, torch.add(x, y, out=y)： y和加法后的y指向相同地址的内存

tensor([[1.6397, 1.9743, 1.8300],
        [1.0444, 1.0246, 1.2588],
        [1.9391, 1.4167, 1.7140]])

**索引**

In [17]:
x = torch.randn(4, 4)
print(x, '\n\n', x[0, :], '\n\n', x[1:-1, 0], '\n\n', x[(0 ,1, 2, 3), (0, 1, 2, 3)])

tensor([[-0.4757, -1.8821, -0.7765,  2.0242],
        [-0.0865,  0.0981, -1.2150,  0.7312],
        [-0.6298,  2.4070,  0.2786,  0.2468],
        [ 1.1843, -0.7282,  1.1633, -0.0091]]) 

 tensor([-0.4757, -1.8821, -0.7765,  2.0242]) 

 tensor([-0.0865, -0.6298]) 

 tensor([-0.4757,  0.0981,  0.2786, -0.0091])


**广播**

In [18]:
x = torch.arange(1, 3).view(1, 2)
y = torch.arange(1, 4).view(3, 1)
print(x, '\n\n', y, '\n\n', x + y)

tensor([[1, 2]]) 

 tensor([[1],
        [2],
        [3]]) 

 tensor([[2, 3],
        [3, 4],
        [4, 5]])


## Tensor梯度

**设置require_grad属性**

In [19]:
x = torch.ones(2, 2, requires_grad=True)
print(x,  '\n\n', y.grad, '\n\n', x.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True) 

 None 

 None


In [20]:
y = x + 2
print(y, '\n\n', y.grad, '\n\n', y.grad_fn)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>) 

 None 

 <AddBackward0 object at 0x7fc118a4c850>


In [21]:
print(x.is_leaf, y.is_leaf)

True False


**backward计算梯度**

In [22]:
x = torch.ones(2, 2, requires_grad=True)
z = y * y * 3
z_mean = z.mean()
print(z, '\n\n', z_mean, '\n\n')
z_mean.backward()  # 等价于 z_mean.backward(torch.tensor(1.))
print(z_mean.grad, '\n\n', y.grad, '\n\n', x.grad, '\n\n')

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) 

 tensor(27., grad_fn=<MeanBackward0>) 


None 

 None 

 None 




**如何避免梯度累加**

In [23]:
x = torch.ones(2, 2, requires_grad=True)
x_sum = x.sum()
x_sum.backward()  # 等价于 x_sum.backward(torch.tensor(1.))
print(x.grad)  # 多次反向传播，grad会累加

tensor([[1., 1.],
        [1., 1.]])


In [24]:
x_sum = x.sum()
# x.grad.data.zero_()  # 一般在反向传播之前需把梯度清零
x_sum.backward()
print(x.grad)

tensor([[2., 2.],
        [2., 2.]])


**如何中断梯度追踪**

In [25]:
x = torch.ones(2, 2, requires_grad=True)
x.data *= 10  # 方法二：假如只想修改tensor的值，但不希望被autograd记录，可以对tensor.data进行操作
y1 = 2 * x
with torch.no_grad():  # 方法一： 使用torch.no_grad()
    y2 = x**3
z = y1 + y2
print(x.requires_grad, '\n\n', y1.requires_grad, '\n\n', y2.requires_grad, '\n\n', z.requires_grad)
z.backward(torch.ones(2, 2), retain_graph=True)
print(x.grad)

True 

 True 

 False 

 True
tensor([[2., 2.],
        [2., 2.]])
None


In [26]:
z_mean  = z.mean()
z_mean.backward()
print(x.grad)

tensor([[2.5000, 2.5000],
        [2.5000, 2.5000]])
