# Autograd

* Experiments with `torch.autograd`

In [42]:
import sys
import torch
from torch.autograd import grad
from wolframclient.evaluation import WolframLanguageSession
from wolframclient.language import wl
session = WolframLanguageSession()
print("The Python version is %s.%s.%s" % sys.version_info[:3])
print(f'The Torch version is: {torch.__version__}')

The Python version is 3.8.9
The Torch version is: 1.10.0


## 1D tensors

#### $f(w, z) = w+2z \longrightarrow \nabla f=(1,2)$

In [20]:
session.evaluate('f[w_, z_] := w + 2 z; Grad[f[w, z], {w, z}]') # Compute the gradient symbolically in Mathematica

(1, 2)

In [19]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)   #  z.requires_grad = False
f = w + 2*z     #  total.requires_grad = True
# Automatic differentiation is straightforward:
[dw,dz] = grad(f,[w,z])
print("[dw,dz] = [{}, {}]".format(dw.item(),dz.item()))

[dw,dz] = [1.0, 2.0]


* Use of `backward()`

In [3]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)   #  z.requires_grad = False
f = w + 2*z     #  total.requires_grad = True
f.backward()
print("[dw,dz] = [{}, {}]".format(w.grad.item(),z.grad.item()))

[dw,dz] = [1.0, 2.0]


#### $f(w,z) =w+\sin(2z)\longrightarrow \nabla f(1,2)=(1.,-1.3072872417272239)$

In [22]:
session.evaluate('f[w_, z_] := w + Sin[2 z]; N@Grad[f[w, z], {w, z}]/. {w -> 1, z -> 2}') # Compute the gradient symbolically in Mathematica

(1.0, -1.3072872417272239)

In [23]:
w = torch.ones(1, requires_grad=True)   #  w.requires_grad = True
z = torch.ones(1, requires_grad=True)*2   #  z.requires_grad = False
f = w + torch.sin(2*z)     #  total.requires_grad = True
# Automatic differentiation is straightforward:
[dw,dz] = grad(f,[w,z])
print("[dw,dz] = [{}, {}]".format(dw.item(),dz.item()))

[dw,dz] = [1.0, -1.3072872161865234]


#### $g_1(w,z)=3w+\cos(z), \\ g_2(w,z)=w^2+\sin(2z),\\ f(w,z) = \exp(g_1(w,z))+3 g_2(w,z)$ $\longrightarrow \nabla f(1,1)=(109.43202095367012,-31.508562530134782)$

In [48]:
session.evaluate("g1[w_, z_] := 3 w + Cos[z]; \
g2[w_, z_] := w^2 + Sin[2 z]; \
f[w_, z_] := Exp[g1[w, z]] + 3 g2[w, z]; \
\"[dw,dz] =\" <> ToString[N[Grad[f[w, z], {w, z}]] /. {w -> 1, z -> 1}]") # Compute the gradient symbolically using the Wolfram language

'[dw,dz] ={109.432, -31.5086}'

In [49]:
w = torch.ones(1, requires_grad=True)
z = torch.ones(1, requires_grad=True)
g1 = 3*w + torch.cos(z)
g2 = torch.pow(w,2) + torch.sin(2*z)
f = torch.exp(g1) + 3*g2
[dw,dz] = grad(f,[w,z])
print("[dw,dz] = [{}, {}]".format(dw.item(),dz.item()))

[dw,dz] = [109.43202209472656, -31.508562088012695]


## 2D tensors

#### $G_1(W,Z)=3W+\cos(Z), \\ G_2(W,Z)=W^2+\sin(2Z),\\ F(W,Z) = \exp(G_1(W,Z))+G_1(W,Z)G_2(W,Z)  \longrightarrow \ \nabla_W F (W_0, Z_0) ,\nabla_Z F (W_0,Z_0) ?$ 

In [61]:
session.evaluate("g1[W_, Z_] := 3 W + Cos[Z];\
g2[W_, Z_] := W . W + Sin[2 Z];\
f[W_, Z_] := Norm[Exp[g1[W, Z]] + g1[W, Z] . g2[W, Z], \"Frobenius\"];\
Ws = {{w11, w12}, {w21, w22}};\
Zs = {{z11, z12}, {z21, z22}};\
W = {{1, 0.5}, {0.2, 0.1}} ;\
Z = {{0.7, 0.1}, {0.34, 0.9}} ;\
Row[AssociationThread[{\"F\", \"dW\", \"dZ\"} ->{N[f[Ws, Zs] /. Thread[Flatten[Ws] -> Flatten[W]]~Join~Thread[Flatten[Zs] -> Flatten[Z]]~Join~{Derivative[1][Abs][p_] -> 1}],\
N[Grad[f[Ws, Zs], Flatten[Ws]]] /. Thread[Flatten[Ws] -> Flatten[W]]~Join~Thread[Flatten[Zs] -> Flatten[Z]]~Join~{Derivative[1][Abs][p_] -> 1},\
N[Grad[f[Ws, Zs], Flatten[Zs]]] /. Thread[Flatten[Ws] -> Flatten[W]]~Join~Thread[Flatten[Zs] -> Flatten[Z]]~Join~{Derivative[1][Abs][p_] -> 1}}]]") # Compute the gradient symbolically in Mathematica

Row[{'F': 56.8399608952662, 'dW': (136.20676537164832, 17.023950389816783, 8.303863404839616, 2.58710109650704), 'dZ': (-26.11500277154785, 2.0506692211505397, 3.4805666706618235, -0.7192011140151858)}]

In [62]:
W = torch.tensor([[1,0.5],[0.2,0.1]], requires_grad=True)
Z = torch.tensor([[0.7,0.1],[0.34,0.9]], requires_grad=True)
G1 = 3*W + torch.cos(Z)
G2 = torch.matmul(W, W) + torch.sin(2*Z)
F = torch.norm(torch.exp(G1) + torch.matmul(G1, G2),p = 2) #  total.requires_grad = True
print("F = {}".format(F))
F.backward()
print("dW = {}".format(W.grad))
print("dZ = {}".format(Z.grad))

F = 56.839962005615234
dW = tensor([[136.2068,  17.0240],
        [  8.3039,   2.5871]])
dZ = tensor([[-26.1150,   2.0507],
        [  3.4806,  -0.7192]])


In [41]:
session.terminate()