# Pytorch basic 

In [45]:
import numpy as np
import torch as t
t.__version__

'0.4.0'

## Create tensor

In [4]:
x1 = t.empty(5,3, dtype=t.long)
print(x1) # empty tensor

tensor([[-5.7646e+18, -9.2234e+18,  8.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])


In [5]:
x2 = t.ones(5,3)
print(x2)

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]])


In [6]:
x3 = t.zeros(5,3)
print(x3)

tensor([[ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  0.]])


In [7]:
x4 = t.rand(5, 3)
print(x4) # a uniform distribution on the interval [0,1)

tensor([[ 0.3734,  0.9163,  0.9067],
        [ 0.4994,  0.8641,  0.6374],
        [ 0.8342,  0.2983,  0.0268],
        [ 0.3233,  0.6779,  0.1722],
        [ 0.6675,  0.6376,  0.4336]])


Construct tensor from list

In [8]:
x5 = t.tensor([[5.5, 3], [3, 0]])
print(x5)

tensor([[ 5.5000,  3.0000],
        [ 3.0000,  0.0000]])


Construct tensor from numpy array

In [9]:
arr = np.array([[5.5, 5], [3, 0]])
x6 = t.tensor(arr)
print(x6)

tensor([[ 5.5000,  5.0000],
        [ 3.0000,  0.0000]], dtype=torch.float64)


## Data type

Torch defines 8 different tensor types

https://pytorch.org/docs/stable/tensors.html#torch-tensor

Default data type are 
- float32 & int64 (from python list)
- float64 & int64 (from numpy ndarray)

Data type list
- t.float32 = t.float
- t.float64 = t.double
- t.float16 = t.halt
- t.uint8
- t.int8
- t.int16 = t.short
- t.int32 = t.int
- t.int64 = t.long

### From python list (int64 & float32)

In [46]:
x = t.tensor([1, 2, 3, 4])
print(x.dtype) # default int64 (long)

torch.int64


In [47]:
y = t.tensor([1.0, 2, 3, 4])
print(y.dtype) # default float32 (float)

torch.float32


### From numpy array (int64 & float64)

In [12]:
x = t.tensor(np.array([[1,2], [3,4]]))
print(x.dtype)

torch.int64


In [13]:
y = t.tensor(np.array([[1.0,2], [3,4]]))
print(y.dtype)

torch.float64


### Specific dtype

In [14]:
z = t.tensor([1, 2, 3, 4], dtype=t.float)
print(z.dtype)

torch.float32


In [15]:
z = t.tensor([1, 2, 3, 4], dtype=t.double)
print(z.dtype)

torch.float64


In [16]:
z = t.tensor([1, 2, 3, 4], dtype=t.half)
print(z.dtype)

torch.float16


In [17]:
z = t.tensor([1, 2, 3, 4], dtype=t.uint8)
print(z.dtype)

torch.uint8


In [18]:
z = t.tensor([1, 2, 3, 4], dtype=t.int8)
print(z.dtype)

torch.int8


In [19]:
z = t.tensor([1, 2, 3, 4], dtype=t.short)
print(z.dtype)

torch.int16


In [20]:
z = t.tensor([1, 2, 3, 4], dtype=t.int)
print(z.dtype)

torch.int32


In [21]:
z = t.tensor([1, 2, 3, 4], dtype=t.long)
print(z.dtype)

torch.int64


### Cast type 

* tensor.to(torch.dtype)
* tensor cast method
    1. tensor.double()
    2. tensor.float()
    3. tensor.half()
    4. tensor.long()
    5. tensor.int()
    6. tensor.short()
    7. tensor.char()
    8. tensor.byte()

In [48]:
z = t.tensor([1,2,3], dtype=t.float)
print(z.dtype)

torch.float32


In [61]:
print(z.to(t.float64))
print(z.double())

tensor([ 1.,  2.,  3.], dtype=torch.float64)
tensor([ 1.,  2.,  3.], dtype=torch.float64)


In [63]:
print(z.to(t.float32))
print(z.float())

tensor([ 1.,  2.,  3.])
tensor([ 1.,  2.,  3.])


In [64]:
print(z.to(t.float16))
print(z.half())

tensor([ 1.,  2.,  3.], dtype=torch.float16)
tensor([ 1.,  2.,  3.], dtype=torch.float16)


In [65]:
print(z.to(t.int64))
print(z.long())

tensor([ 1,  2,  3])
tensor([ 1,  2,  3])


In [66]:
print(z.to(t.int32))
print(z.int())

tensor([ 1,  2,  3], dtype=torch.int32)
tensor([ 1,  2,  3], dtype=torch.int32)


In [68]:
print(z.to(t.int16))
print(z.short())

tensor([ 1,  2,  3], dtype=torch.int16)
tensor([ 1,  2,  3], dtype=torch.int16)


In [76]:
print(z.to(t.int8))
print(z.char())

tensor([ 1,  2,  3], dtype=torch.int8)
tensor([ 1,  2,  3], dtype=torch.int8)


In [75]:
print(z.to(t.uint8))
print(z.byte())

tensor([ 1,  2,  3], dtype=torch.uint8)
tensor([ 1,  2,  3], dtype=torch.uint8)


## Operations

### Add

In [22]:
x = t.tensor([[5.0, 3], [1, 0]], dtype=t.float)
y = t.tensor([[1.0, 0], [2, 4]], dtype=t.float)

In [23]:
a1 = x+y
print(a1)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [24]:
a2 = t.add(x, y)
print(a2)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [25]:
a3 = x.add(y)
print(a3)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [26]:
a4 = t.empty(2, 2)
t.add(x, y, out=a4)
print(a4)

tensor([[ 6.,  3.],
        [ 3.,  4.]])


In [27]:
print(x.dtype)

torch.float32


Inplace add "tensor.add_(tensor2)"

In [28]:
a5 = x.add_(y)
print(a5)
print(x) # change the value of x

tensor([[ 6.,  3.],
        [ 3.,  4.]])
tensor([[ 6.,  3.],
        [ 3.,  4.]])


## Numpy bridge

In [29]:
a = t.ones(5)
print(a, type(a))

tensor([ 1.,  1.,  1.,  1.,  1.]) <class 'torch.Tensor'>


In [30]:
b = a.numpy()
print(b, type(b))

[1. 1. 1. 1. 1.] <class 'numpy.ndarray'>


Changing "a" will change "b". They share the same memory locations.

Only works for **in-place** operations.

In [31]:
a = t.ones(5)
b = a.numpy()
a.add_(1) # torch in-place operation
print(a)
print(b)

tensor([ 2.,  2.,  2.,  2.,  2.])
[2. 2. 2. 2. 2.]


In [32]:
a = np.ones(5)
b = t.from_numpy(a) # share the same memory location
np.add(a, 1, out=a) # numpy inplace operation
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


In [33]:
a = np.ones(5)
b = t.from_numpy(a)
a[0] += 1 # inplace operation
print(a)
print(b)

[2. 1. 1. 1. 1.]
tensor([ 2.,  1.,  1.,  1.,  1.], dtype=torch.float64)


No effect on the torch tensor, because this is not an in-place operation.

In [34]:
a = np.ones(5)
b = t.from_numpy(a)
a = np.add(a, 1) # numpy inplace operation
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 1.,  1.,  1.,  1.,  1.], dtype=torch.float64)


## Torch autograd

### Default: not require gradient

In [35]:
x = t.tensor([1,2,3])
print(x.requires_grad)

False


In [36]:
y = x+3
print(y.requires_grad)

False


### Calculate gradient (backward)

#### Example1

In [37]:
x = t.ones(2, 2, requires_grad=True)
print(x)
print(x.requires_grad)

tensor([[ 1.,  1.],
        [ 1.,  1.]])
True


In [38]:
y = x+2
print(y)
print(y.requires_grad)

tensor([[ 3.,  3.],
        [ 3.,  3.]])
True


In [39]:
y[0,0].backward()

In [40]:
x.grad

tensor([[ 1.,  0.],
        [ 0.,  0.]])

#### Example2

$y_i = (x_i+2)x_i^2$

$z = \frac{1}{4}\sum y_i$

$\frac{\partial z}{\partial x_i}=\frac{\partial z}{\partial y_i}\frac{\partial y_i}{\partial x_i}$

$\frac{\partial z}{\partial x_i}|_{x_i=1}=\frac{1}{4} \times 7 = 1.75$

In [41]:
x = t.ones(2, 2, requires_grad=True)
y = (x+2)*x**2
z = y.mean()
print(z)

tensor(3.)


In [42]:
z.backward()

In [43]:
x.grad

tensor([[ 1.7500,  1.7500],
        [ 1.7500,  1.7500]])

## Tensor reshape

In [85]:
a = t.randn(4,2,3)
print(a)

tensor([[[-1.1080,  1.3722, -0.9794],
         [ 1.4614,  0.6669,  0.3343]],

        [[-1.9850,  0.5830,  0.5376],
         [-0.4095, -0.9096,  1.0910]],

        [[-0.1644,  0.5602, -0.2707],
         [-0.0756, -0.3894, -0.6641]],

        [[ 1.0850, -0.8010,  1.6586],
         [-0.0160, -0.9180, -0.4066]]])


In [86]:
b = a.view(4,6)
print(b)

tensor([[-1.1080,  1.3722, -0.9794,  1.4614,  0.6669,  0.3343],
        [-1.9850,  0.5830,  0.5376, -0.4095, -0.9096,  1.0910],
        [-0.1644,  0.5602, -0.2707, -0.0756, -0.3894, -0.6641],
        [ 1.0850, -0.8010,  1.6586, -0.0160, -0.9180, -0.4066]])


In [87]:
c = a.reshape(4,6)
print(c)

tensor([[-1.1080,  1.3722, -0.9794,  1.4614,  0.6669,  0.3343],
        [-1.9850,  0.5830,  0.5376, -0.4095, -0.9096,  1.0910],
        [-0.1644,  0.5602, -0.2707, -0.0756, -0.3894, -0.6641],
        [ 1.0850, -0.8010,  1.6586, -0.0160, -0.9180, -0.4066]])
