# Autograd

* Experiments with `torch.autograd`

In [2]:
import sys
print("The Python version is %s.%s.%s" % sys.version_info[:3])
import torch
print(f'The Torch version is: {torch.__version__}')
from torch import nn
from torch.autograd import grad
import numpy as np

The Python version is 3.7.0
The Torch version is: 1.0.1.post2


## Basic examples : 1D tensors

#### $f(w, z) = w+2z \longrightarrow \nabla f=(1,2)$

In [20]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)   #  z.requires_grad = False
f = w + 2*z     #  total.requires_grad = True
# Automatic differentiation is straightforward:
[dw,dz] = grad(f,[w,z])
print("[dw,dz] = [{}, {}]".format(dw.item(),dz.item()))

[dw,dz] = [1.0, 2.0]


* Use of `backward()`

In [19]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)   #  z.requires_grad = False
f = w + 2*z     #  total.requires_grad = True
f.backward()
print("[dw,dz] = [{}, {}]".format(w.grad.item(),z.grad.item()))

[dw,dz] = [1.0, 2.0]


#### $f(w,z) =w+Sin(2z)\longrightarrow \nabla f(1,2)=(1.,-1.3072872417272239)$

In [21]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)*2   #  z.requires_grad = False
f = w + torch.sin(2*z)     #  total.requires_grad = True
# Automatic differentiation is straightforward:
[dw,dz] = grad(f,[w,z])
print("[dw,dz] = [{}, {}]".format(dw.item(),dz.item()))

[dw,dz] = [1.0, -1.3072872161865234]


#### Chain Rule

* Consider:

$g_1(w,z)=3w+\cos(z), \\ g_2(w,z)=w^2+\sin(2z),\\ f(w,z) = \exp(g_1(w,z))+3 g_2(w,z)$

$\longrightarrow \nabla f(1,1)=(109.43202095367012,-31.508562530134782)$

* Check this with Mathematica:

* Code

In [22]:
w = torch.ones(1, requires_grad=True)
z = torch.ones(1, requires_grad=True)
g1 = 3*w + torch.cos(z)
g2 = torch.pow(w,2) + torch.sin(2*z)
f = torch.exp(g1) + 3*g2 #  total.requires_grad = True
f.backward()
print("[dw,dz] = [{}, {}]".format(w.grad.item(),z.grad.item()))

[dw,dz] = [109.43202209472656, -31.508562088012695]


## Basic examples : 2D tensors

* Multivariate calculus

$G_1(W,Z)=3W+\cos(Z), \\ G_2(W,Z)=W^2+\sin(2Z),\\ F(W,Z) = \exp(G_1(W,Z))+G_1(W,Z)G_2(W,Z)$

$\longrightarrow
\nabla_W F \left(
\left(
\begin{array}{cc}
 1 & 0.5 \\
 0.2 & 0.1 \\
\end{array}
\right),
\left(
\begin{array}{cc}
 1 & 0.5 \\
 0.2 & 0.1 \\
\end{array}
\right)
\right)
= \left(
\begin{array}{cc}
 109.231 & 15.7019 \\
 9.28887 & 3.30879 \\
\end{array}
\right)$

$\longrightarrow
\nabla_Z F \left(
\left(
\begin{array}{cc}
 1 & 0.5 \\
 0.2 & 0.1 \\
\end{array}
\right),
\left(
\begin{array}{cc}
 1 & 0.5 \\
 0.2 & 0.1 \\
\end{array}
\right)
\right)
= \left(
\begin{array}{cc}
 -31.2551 & -0.557821 \\
 4.13727 & 1.89057 \\
\end{array}
\right)$

* Check this with Mathematica:

In [34]:
W = torch.tensor([[1,0.5],[0.2,0.1]], requires_grad=True)
Z = torch.tensor([[1,0.5],[0.2,0.1]], requires_grad=True)
G1 = 3*W + torch.cos(Z)
G2 = torch.matmul(W, W) + torch.sin(2*Z)
F = torch.norm(torch.exp(G1) + torch.matmul(G1, G2),p = 2) #  total.requires_grad = True
print("F : {}".format(F))
F.backward()
print("dW = {}".format(W.grad))
print("dZ = {}".format(Z.grad))

F : 47.32332992553711
dW = tensor([[109.2307,  15.7019],
        [  9.2889,   3.3088]])
dZ = tensor([[-31.2551,  -0.5578],
        [  4.1373,   1.8906]])
