# Arithmetic operations

In [1]:
import torch
import math
import numpy as np

In [2]:
x = torch.ones(2, 3)

### matrix product

In [3]:
y1 = x @ x.T
y2 = x.matmul(x.T)

y3 = torch.zeros((2, 2))
torch.matmul(x, x.T, out=y3)
print(y1,'\n', y2,'\n', y3)

tensor([[3., 3.],
        [3., 3.]]) 
 tensor([[3., 3.],
        [3., 3.]]) 
 tensor([[3., 3.],
        [3., 3.]])


### element-wise product

In [4]:
z1 = x * x
z2 = x.mul(x)

z3 = torch.rand_like(x)
torch.mul(x, x, out=z3)
print(z1,'\n', z2,'\n', z3)

tensor([[1., 1., 1.],
        [1., 1., 1.]]) 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 
 tensor([[1., 1., 1.],
        [1., 1., 1.]])


### 注意区分
1. **torch.mm()** - performs a matrix multiplication without broadcasting - (2D tensor) by (2D tensor)
2. **torch.mul()** - performs a elementwise multiplication with broadcasting - (Tensor) by (Tensor or Number)
3. **torch.matmul()** - matrix product with broadcasting - (Tensor) by (Tensor) with different behaviors depending on the tensor shapes (dot product, matrix product, batched matrix products).

### 从单元素tensor中提取元素的value

In [5]:
y = torch.sum(x) # or y = x.sum()
value_y = y.item()
print(y, type(y), value_y, type(value_y))

tensor(6.) <class 'torch.Tensor'> 6.0 <class 'float'>


### broadcast
规则：按照**last to first**的顺序比较每个dim的值，
所有dim值相同的话，直接运算；\
不相同的dim上，其中一个operand的dim值是0；\
或者，dimension在其中一个tensor上不存在。

In [6]:
a =     torch.ones(2, 3, 2)
b = a * torch.rand(   3, 2) # 3rd & 2nd dims identical to a, dim 1 absent
print(b)

c = a * torch.rand(   3, 1) # 3rd dim = 1, 2nd dim identical to a
print(c)

d = a * torch.rand(   1, 2) # 3rd dim identical to a, 2nd dim = 1
print(d)

tensor([[[0.0848, 0.8052],
         [0.9057, 0.1640],
         [0.6530, 0.8912]],

        [[0.0848, 0.8052],
         [0.9057, 0.1640],
         [0.6530, 0.8912]]])
tensor([[[0.3890, 0.3890],
         [0.6748, 0.6748],
         [0.7538, 0.7538]],

        [[0.3890, 0.3890],
         [0.6748, 0.6748],
         [0.7538, 0.7538]]])
tensor([[[0.6372, 0.4234],
         [0.6372, 0.4234],
         [0.6372, 0.4234]],

        [[0.6372, 0.4234],
         [0.6372, 0.4234],
         [0.6372, 0.4234]]])


In [7]:
# 典型错误
a =     torch.ones(4, 3, 2)
# b = a * torch.rand(4, 3)    # dimensions must match last-to-first
# c = a * torch.rand(   2, 3) # both 3rd & 2nd dims different
# d = a * torch.rand((0, ))   # can't broadcast with an empty tensor

### in-place ops
大多数binary operations都会返回新建的tensor，但很多数学运算也支持in-place operation：
1. 用下划线标记，或者用函数提供的out参数
2. 虽然in-place操作可以节省存储空间，但求梯度的时候不要用，会丢失weights history

In [8]:
x = torch.rand((2, 3))
print('原x:\n', x)
x.add_(1)
print('in-place add:\n', x)
x.t_()
print('in-place transpose:\n', x)

原x:
 tensor([[0.8773, 0.4527, 0.9055],
        [0.5495, 0.8900, 0.7486]])
in-place add:
 tensor([[1.8773, 1.4527, 1.9055],
        [1.5495, 1.8900, 1.7486]])
in-place transpose:
 tensor([[1.8773, 1.5495],
        [1.4527, 1.8900],
        [1.9055, 1.7486]])


In [9]:
a = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('a:\n', a)
print("output:\n", torch.sin(a)) 
print("a没变：\n", a) 

b = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('b:\n', b)
print("output:\n", torch.sin_(b))
print("b改变：\n", b) 

a:
 tensor([0.0000, 0.7854, 1.5708, 2.3562])
output:
 tensor([0.0000, 0.7071, 1.0000, 0.7071])
a没变：
 tensor([0.0000, 0.7854, 1.5708, 2.3562])
b:
 tensor([0.0000, 0.7854, 1.5708, 2.3562])
output:
 tensor([0.0000, 0.7071, 1.0000, 0.7071])
b改变：
 tensor([0.0000, 0.7071, 1.0000, 0.7071])


In [10]:
a = torch.rand(2, 2)
b = torch.rand(2, 2)
c = torch.zeros(2, 2)
old_id = id(c)

print(c)
d = torch.matmul(a, b, out=c)
print(c)                # contents of c have changed

assert c is d           # test c & d are same object, not just containing equal values
assert id(c) == old_id  # make sure that our new c is the same object as the old one

torch.rand(2, 2, out=c) # works for creation too!
print(c)                # c has changed again
assert id(c) == old_id  # still the same object!

tensor([[0., 0.],
        [0., 0.]])
tensor([[0.6382, 0.5725],
        [1.1383, 1.1478]])
tensor([[0.0379, 0.0416],
        [0.3732, 0.9919]])


### copy tensor
1. assignment只是给等号右边对象新建了一个label，并没有复制新的对象
2. 如果要复制，可以用tensor.clone()
3. 如果被复制的tensor enabled autograd，那么clone得到的tensor也会enable。如果不希望clone得到的tensor复制原tensor的autograd设置，可以用tensor.detach()

In [11]:
a = torch.ones(2, 2)
b = a.clone()

assert b is not a      # 在memory中是不同的object
# assert b == a          # 不能这样assert，因为b==a输出的是boolean矩阵，assert只处理单个boolean值
print(torch.eq(a, b))  # ...but still with the same contents!

a[0][1] = 561          # a changes...
print(b)               # ...but b is still all ones

tensor([[True, True],
        [True, True]])
tensor([[1., 1.],
        [1., 1.]])


In [12]:
a = torch.rand(2, 2, requires_grad=True) # turn on autograd
print(a)

b = a.clone()
print(b)

c = a.detach().clone()  # 这里在不改变a的设置的条件下，关闭c的autograd
print(c)                # c的autograd关闭
print(a)                # a的autograd还是开启状态

tensor([[0.1276, 0.3408],
        [0.8050, 0.7558]], requires_grad=True)
tensor([[0.1276, 0.3408],
        [0.8050, 0.7558]], grad_fn=<CloneBackward0>)
tensor([[0.1276, 0.3408],
        [0.8050, 0.7558]])
tensor([[0.1276, 0.3408],
        [0.8050, 0.7558]], requires_grad=True)


### reduce ops

In [13]:
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])
# 注意输出形式：
torch.max(x, dim=1)

torch.return_types.max(
values=tensor([3, 6, 9]),
indices=tensor([2, 2, 2]))

In [14]:
# Find the highest value in each row
max_values, _ = torch.max(x, dim=1)
print(max_values)

tensor([3, 6, 9])
