# Pytorch basic 

In [1]:
import numpy as np
import torch as t
t.__version__

'0.4.0'

## Create tensor

In [2]:
x1 = t.empty(5,3, dtype=t.long)
print(x1) # empty tensor

tensor([[ 4.6117e+18,  8.0705e+18,  8.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])


In [3]:
x2 = t.ones(5,3)
print(x2)

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]])


In [4]:
x3 = t.zeros(5,3)
print(x3)

tensor([[ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.]])


In [5]:
x4 = t.rand(5, 3)
print(x4) # a uniform distribution on the interval [0,1)

tensor([[ 0.9124,  0.7245,  0.4623],
        [ 0.5589,  0.1964,  0.4671],
        [ 0.2263,  0.9846,  0.6549],
        [ 0.5780,  0.2799,  0.8824],
        [ 0.0240,  0.2374,  0.5662]])


Construct tensor from list

In [6]:
x5 = t.tensor([[5.5, 3], [3, 0]])
print(x5)

tensor([[ 5.5000,  3.0000],
        [ 3.0000,  0.0000]])


Construct tensor from numpy array

In [7]:
arr = np.array([[5.5, 5], [3, 0]])
x6 = t.tensor(arr)
print(x6)

tensor([[ 5.5000,  5.0000],
        [ 3.0000,  0.0000]], dtype=torch.float64)


## Data type

Torch defines 8 different tensor types

https://pytorch.org/docs/stable/tensors.html#torch-tensor

Default data type are 
- float32 & int64 (from python list)
- float64 & int64 (from numpy ndarray)

Data type list
- t.float32 = t.float
- t.float64 = t.double
- t.float16 = t.halt
- t.uint8
- t.int8
- t.int16 = t.short
- t.int32 = t.int
- t.int64 = t.long

### From python list (int64 & float32)

In [8]:
x = t.tensor([1, 2, 3, 4])
print(x.dtype) # default int64 (long)

torch.int64


In [9]:
y = t.tensor([1.0, 2, 3, 4])
print(y.dtype) # default float32 (float)

torch.float32


### From numpy array (int64 & float64)

In [10]:
x = t.tensor(np.array([[1,2], [3,4]]))
print(x.dtype)

torch.int64


In [11]:
y = t.tensor(np.array([[1.0,2], [3,4]]))
print(y.dtype)

torch.float64


### Specific dtype

In [12]:
z = t.tensor([1, 2, 3, 4], dtype=t.float)
print(z.dtype)

torch.float32


In [13]:
z = t.tensor([1, 2, 3, 4], dtype=t.double)
print(z.dtype)

torch.float64


In [14]:
z = t.tensor([1, 2, 3, 4], dtype=t.half)
print(z.dtype)

torch.float16


In [15]:
z = t.tensor([1, 2, 3, 4], dtype=t.uint8)
print(z.dtype)

torch.uint8


In [16]:
z = t.tensor([1, 2, 3, 4], dtype=t.int8)
print(z.dtype)

torch.int8


In [17]:
z = t.tensor([1, 2, 3, 4], dtype=t.short)
print(z.dtype)

torch.int16


In [18]:
z = t.tensor([1, 2, 3, 4], dtype=t.int)
print(z.dtype)

torch.int32


In [19]:
z = t.tensor([1, 2, 3, 4], dtype=t.long)
print(z.dtype)

torch.int64


### Cast type 

* tensor.to(torch.dtype)
* tensor cast method
    1. tensor.double()
    2. tensor.float()
    3. tensor.half()
    4. tensor.long()
    5. tensor.int()
    6. tensor.short()
    7. tensor.char()
    8. tensor.byte()

In [20]:
z = t.tensor([1,2,3], dtype=t.float)
print(z.dtype)

torch.float32


In [21]:
print(z.to(t.float64))
print(z.double())

tensor([ 1.,  2.,  3.], dtype=torch.float64)
tensor([ 1.,  2.,  3.], dtype=torch.float64)


In [22]:
print(z.to(t.float32))
print(z.float())

tensor([ 1.,  2.,  3.])
tensor([ 1.,  2.,  3.])


In [23]:
print(z.to(t.float16))
print(z.half())

tensor([ 1.,  2.,  3.], dtype=torch.float16)
tensor([ 1.,  2.,  3.], dtype=torch.float16)


In [24]:
print(z.to(t.int64))
print(z.long())

tensor([ 1,  2,  3])
tensor([ 1,  2,  3])


In [25]:
print(z.to(t.int32))
print(z.int())

tensor([ 1,  2,  3], dtype=torch.int32)
tensor([ 1,  2,  3], dtype=torch.int32)


In [26]:
print(z.to(t.int16))
print(z.short())

tensor([ 1,  2,  3], dtype=torch.int16)
tensor([ 1,  2,  3], dtype=torch.int16)


In [27]:
print(z.to(t.int8))
print(z.char())

tensor([ 1,  2,  3], dtype=torch.int8)
tensor([ 1,  2,  3], dtype=torch.int8)


In [28]:
print(z.to(t.uint8))
print(z.byte())

tensor([ 1,  2,  3], dtype=torch.uint8)
tensor([ 1,  2,  3], dtype=torch.uint8)


## Operations

### Add

In [29]:
x = t.tensor([[5.0, 3], [1, 0]], dtype=t.float)
y = t.tensor([[1.0, 0], [2, 4]], dtype=t.float)

In [30]:
a1 = x+y
print(a1)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [31]:
a2 = t.add(x, y)
print(a2)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [32]:
a3 = x.add(y)
print(a3)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [33]:
a4 = t.empty(2, 2)
t.add(x, y, out=a4)
print(a4)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [34]:
print(x.dtype)

torch.float32


Inplace add "tensor.add_(tensor2)"

In [35]:
a5 = x.add_(y)
print(a5)
print(x) # change the value of x

tensor([[ 6.,  3.],
        [ 3.,  4.]])
tensor([[ 6.,  3.],
        [ 3.,  4.]])


## Numpy bridge

In [36]:
a = t.ones(5)
print(a, type(a))

tensor([ 1.,  1.,  1.,  1.,  1.]) <class 'torch.Tensor'>


In [37]:
b = a.numpy()
print(b, type(b))

[1. 1. 1. 1. 1.] <class 'numpy.ndarray'>


Changing "a" will change "b". They share the same memory locations.

Only works for **in-place** operations.

In [38]:
a = t.ones(5)
b = a.numpy()
a.add_(1) # torch in-place operation
print(a)
print(b)

tensor([ 2.,  2.,  2.,  2.,  2.])
[2. 2. 2. 2. 2.]


In [39]:
a = np.ones(5)
b = t.from_numpy(a) # share the same memory location
np.add(a, 1, out=a) # numpy inplace operation
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


In [40]:
a = np.ones(5)
b = t.from_numpy(a)
a[0] += 1 # inplace operation
print(a)
print(b)

[2. 1. 1. 1. 1.]
tensor([ 2.,  1.,  1.,  1.,  1.], dtype=torch.float64)


No effect on the torch tensor, because this is not an in-place operation.

In [41]:
a = np.ones(5)
b = t.from_numpy(a)
a = np.add(a, 1) # numpy inplace operation
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 1.,  1.,  1.,  1.,  1.], dtype=torch.float64)


## Torch autograd

### Default: not require gradient

In [42]:
x = t.tensor([1,2,3])
print(x.requires_grad)

False


In [43]:
y = x+3
print(y.requires_grad)

False


### Calculate gradient (backward)

#### Example1

In [44]:
x = t.ones(2, 2, requires_grad=True)
print(x)
print(x.requires_grad)

tensor([[ 1.,  1.],
        [ 1.,  1.]])
True


In [45]:
y = x+2
print(y)
print(y.requires_grad)

tensor([[ 3.,  3.],
        [ 3.,  3.]])
True


In [46]:
y[0,0].backward()

In [47]:
x.grad

tensor([[ 1.,  0.],
        [ 0.,  0.]])

#### Example2

$y_i = (x_i+2)x_i^2$

$z = \frac{1}{4}\sum y_i$

$\frac{\partial z}{\partial x_i}=\frac{\partial z}{\partial y_i}\frac{\partial y_i}{\partial x_i}$

$\frac{\partial z}{\partial x_i}|_{x_i=1}=\frac{1}{4} \times 7 = 1.75$

In [48]:
x = t.ones(2, 2, requires_grad=True)
y = (x+2)*x**2
z = y.mean()
print(z)

tensor(3.)


In [49]:
z.backward()

In [50]:
x.grad

tensor([[ 1.7500,  1.7500],
        [ 1.7500,  1.7500]])

## Tensor reshape

In [51]:
a = t.randn(4,2,3)
print(a)

tensor([[[ 0.3601,  1.5315, -0.6820],
         [ 1.9653,  0.9990,  0.1125]],

        [[ 0.0807,  0.2293,  0.2760],
         [ 0.7282,  0.0021, -0.6323]],

        [[ 0.4375,  0.4844, -1.0338],
         [-1.9256,  0.7573,  0.7399]],

        [[ 1.5236,  0.3562,  0.3065],
         [-1.0073,  0.0381,  0.0779]]])


In [52]:
b = a.view(4,6)
print(b)

tensor([[ 0.3601,  1.5315, -0.6820,  1.9653,  0.9990,  0.1125],
        [ 0.0807,  0.2293,  0.2760,  0.7282,  0.0021, -0.6323],
        [ 0.4375,  0.4844, -1.0338, -1.9256,  0.7573,  0.7399],
        [ 1.5236,  0.3562,  0.3065, -1.0073,  0.0381,  0.0779]])


In [53]:
c = a.reshape(4,6)
print(c)

tensor([[ 0.3601,  1.5315, -0.6820,  1.9653,  0.9990,  0.1125],
        [ 0.0807,  0.2293,  0.2760,  0.7282,  0.0021, -0.6323],
        [ 0.4375,  0.4844, -1.0338, -1.9256,  0.7573,  0.7399],
        [ 1.5236,  0.3562,  0.3065, -1.0073,  0.0381,  0.0779]])


## Reduced operation

https://pytorch.org/docs/stable/torch.html#reduction-ops

1. torch.sum()
2. torch.prod()
3. torch.mean()
4. torch.argmax()
5. torch.argmin()
6. torch.cumprod()
7. torch.cumsum()
8. torch.median()
9. torch.std()

In [56]:
a = t.randn(3,3)
print(a)

tensor([[ 1.5321, -0.3514,  1.6674],
        [ 0.1950, -0.8240,  0.8309],
        [-0.8030,  1.2325,  0.0537]])


In [61]:
print(t.sum(a, dim=0, keepdim=True))  # 1*3 tensor, calculate column sum

tensor([[ 0.9240,  0.0570,  2.5520]])


In [60]:
print(t.sum(a, dim=1, keepdim=True))  # 3*1 tensor, calculate row sum

tensor([[ 2.8480],
        [ 0.2019],
        [ 0.4831]])


## Tensor concate

In [62]:
a = t.randn(3,3)
b = t.randn(4,3)

t.cat concate two tensor along an **existed** axis.

In [65]:
c = t.cat([a,b], dim=0)
print(c.shape)

torch.Size([7, 3])


t.stack concate two tensor along a **new** axis.

In [66]:
d = t.randn(3,3)
e = t.stack([a,d], dim=2)
print(e.shape)

torch.Size([3, 3, 2])
