In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Numpy/pandas array to torch tensor

In [2]:
a = np.array([[1, 0, 1], [2, 1, 1]])
b = torch.from_numpy(a)
a, b

(array([[1, 0, 1],
        [2, 1, 1]]), tensor([[1, 0, 1],
         [2, 1, 1]]))

In [3]:
df = pd.DataFrame({"a": [0, 1, 2], "b": [2, 3, 4]})
df_torch = torch.from_numpy(df.values)
df_torch

tensor([[0, 2],
        [1, 3],
        [2, 4]])

### They share memory

In [4]:
a[0, 0] = 666
b

tensor([[666,   0,   1],
        [  2,   1,   1]])

In [5]:
df_torch[0, 0] = 666
df

Unnamed: 0,a,b
0,666,2
1,1,3
2,2,4


## Other ways

In [6]:
c = torch.tensor([[0, 1, 0], [1, 1, 1]])
c

tensor([[0, 1, 0],
        [1, 1, 1]])

In [7]:
d = torch.ones(size=[2, 3])
d

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [8]:
e = torch.eye(n=3)
e

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

## Operations

In [9]:
b[0, 0] = 1
print(b)
print(c)

tensor([[1, 0, 1],
        [2, 1, 1]])
tensor([[0, 1, 0],
        [1, 1, 1]])


### sum

In [10]:
b + c

tensor([[1, 1, 1],
        [3, 2, 2]])

In [11]:
b.sum()

tensor(6)

In [12]:
b.sum(dim=1)

tensor([2, 4])

### scalar muliplication

In [13]:
2 * b

tensor([[2, 0, 2],
        [4, 2, 2]])

### pointwise multiplication

In [14]:
b * c

tensor([[0, 0, 0],
        [2, 1, 1]])

### dot product

In [15]:
print(b[1])
print(c[1])

tensor([2, 1, 1])
tensor([1, 1, 1])


In [16]:
torch.dot(b[1], c[1])

tensor(4)

### transpose

In [17]:
c.t()

tensor([[0, 1],
        [1, 1],
        [0, 1]])

### Matrix multiplication

In [18]:
torch.matmul(b, c.t())

tensor([[0, 2],
        [1, 4]])

In [31]:
c.t().unsqueeze(0).shape

torch.Size([1, 3, 2])

### unsqueeze

In [37]:
b.unsqueeze(1)

tensor([[[1, 0, 1]],

        [[2, 1, 1]]])

In [35]:
c.t().unsqueeze(0)

tensor([[[0, 1],
         [1, 1],
         [0, 1]]])

In [43]:
torch.matmul(b, c.t().unsqueeze(0)).shape

torch.Size([1, 2, 2])

## How autograd works

In [29]:
x1 = torch.tensor([0.1, 0.2], requires_grad=True)
x2 = torch.tensor([0.1, 0.1], requires_grad=True)
y = 3 * x1 + x2 + 1
output = y.mean()
output.backward()
print(x1.grad, x2.grad)

tensor([1.5000, 1.5000]) tensor([0.5000, 0.5000])


The method `backward` calculates gradients. __It does not put them to zero!__ But there is a convinient method for that.

In [30]:
y = 3 * x1 + x2 + 1
output = y.mean()
output.backward()
print(x1.grad, x2.grad)

tensor([3., 3.]) tensor([1., 1.])


## Let's check if derivative are calculated correctly

We start with $x = 2$.

$y = f(x) = x^2 - 3$, $f(2) = 2^2 - 3 = 1$.

$z = g(y) = y^3 - 2y$, $g(f(2)) = g(1) = 1^3 - 2\cdot 1 = -1$

In [8]:
x = torch.tensor([2.0], requires_grad=True)
print("x = ", x)
y = x**2 - 3
print("y = ", y)
z = y**3 - 2*y
print("z = ", z)


x =  tensor([ 2.])
y =  tensor([ 1.])
z =  tensor([-1.])



#### Chain rule
$(g(f(x))' = g'(f(x))\cdot f'(x)$

#### Derivaties
$f'(x) = 2x$, $f'(2) = 2\cdot 2 = 4$

$g'(y) = 3y^2 - 2$, $g'(f(2)) = g'(1) = 3 - 2 = 1$

$(g(f(2))'=g'(f(2))\cdot f(2)) = 4 \cdot 1 = 4$

In [9]:
z.backward()
x.grad

tensor([ 4.])