In [None]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

# Reverse Mode Automatic Differentiation

We demonstrate reverse mode AD with the function
$$ f(x_1, x_2) = \sin(x_1 + x_2) \cdot \cos(x_1 - x_2) + (x_1 + x_2) \cdot (x_1 - x_2) $$
To compute the function step by step, we introduce the following auxiliary variables:
* $v_1 := x_1 + x_2$,
* $v_2 := x_1 - x_2$,
* $v_3 := \sin(v_1)$,
* $v_4 := \cos(v_2)$,
* $v_5 := v_3 \cdot v_4$,
* $v_6 := v_1 \cdot v_2$,
* $y   := v_5 + v_6$.

In [None]:
def f(x1, x2):
    return sin(x1 + x2) * cos(x1 - x2) + (x1 + x2) * (x1 - x2)

In [None]:
CG = [ ('v1', '+',   'x1', 'x2'),
       ('v2', '-',   'x1', 'x2'),
       ('v3', 'sin', 'v1'),
       ('v4', 'cos', 'v2'),
       ('v5', '*',   'v3', 'v4'),
       ('v6', '*',   'v1', 'v2'),
       ('y',  '+',   'v5', 'v6')
     ]

In [None]:
import graphviz as gv

In [None]:
def all_variables(CG):
    Variables = set()
    for node in CG:
        match node:
            case (v, _, a1, a2):
                if a1[0] == 'x':
                    Variables.add(a1)
                if a2[0] == 'x':
                     Variables.add(a2)       
            case (v, _, a):
                if a[0] == 'x':
                    Variables.add(a)
    return Variables

In [None]:
all_variables(CG)

In [None]:
def render(CG):
    cg = gv.Graph()
    cg.attr(rankdir='LR', splines='false')
    Variables = all_variables(CG)
    for x in Variables:
        cg.node(x, label=x, shape='circle')
    for node in CG:
        match node:
            case (v, op, a1, a2):
                l = f'{v} := {a1} {op} {a2}'
            case (v, f, a):
                l = f'{v} :=  {f}({a})'
        cg.node(v, label=l, shape='rectangle')
    for node in CG:
        match node:
            case (v, _, a1, a2):
                cg.edge(a1, v)
                cg.edge(a2, v)
            case (v, _, a):
                cg.edge(a, v)
    return cg

In [None]:
render(CG)

In [None]:
import math

The function `eval_graph` takes two arguments:
* `CG` is a computational graph,
* `Values` is a dictionary assigning values to variable names.

In [None]:
def eval_graph(CG, Values):
    for node in CG:
        match node:
            case (v, '+', a1, a2):
                Values[v] = Values[a1] + Values[a2]
            case (v, '-', a1, a2):
                Values[v] = Values[a1] - Values[a2]
            case (v, '*', a1, a2):
                Values[v] = Values[a1] * Values[a2]
            case (v, '/', a1, a2):
                Values[v] = Values[a1] / Values[a2]
            case (v, 'exp', a):
                Values[v] = math.exp(Values[a])
            case (v, 'log', a):
                Values[v] = math.log(Values[a])
            case (v, 'sin', a):
                Values[v] = math.sin(Values[a])
            case (v, 'cos', a):
                Values[v] = math.cos(Values[a])
            case (v, 'atan', a):
                Values[v] = math.atan(Values[a])
            case (v, r):
                Values[v] = r
    return Values['y']

In [None]:
eval_graph(CG, {'x1': 0, 'x2': 1})

In [None]:
def add_to_dictionary(D, key, value):
    if key in D:
        D[key] |= { value }
    else:
        D[key]  = { value }

Given a *computational graph* `CG`, the function `parents` returns a dictionary `Parents` such that
for every node name `n` occurring in `CG` we have that `Parents[n]` is the set of nodes that are parents 
of the node labeled with `n`. 

In [None]:
def parents(CG):
    Parents = {}
    for node in CG:
        match node:
            case (p, _, a):
                add_to_dictionary(Parents, a, p)
            case (p, _, a1, a2):
                add_to_dictionary(Parents, a1, p)
                add_to_dictionary(Parents, a2, p)
    return Parents

In [None]:
parents(CG)

In [None]:
def node_dictionary(CG):
    D = {}
    for node in CG:
        name    = node[0]
        D[name] = node
    return D

In [None]:
node_dictionary(CG)

The function `partial_derivative` takes three arguments:
* `Node` is a computational node,
* `arg` is the name of a node occurring as argument in `Node`, 
* `Values` is a dictionary that stores a value for every node name.
It computes the partial derivative of `Node` w.r.t. `arg`.

In [None]:
def partial_derivative(Node, arg, Values):
    result = 0
    match Node:
        case n, '+', a1, a2:
            if arg == a1 == a2:
                return 2
            if arg == a1 or arg == a2:
                return 1
            else:
                assert False, f'partial_derivative({Node}, {arg})'
        case n, '-', a1, a2:
            if arg == a1 == a2:
                return 0
            if arg == a1:
                return 1
            if arg == a2:
                return -1
            else:
                assert False, f'partial_derivative({Node}, {arg})'
        case n, '*', a1, a2:
            if arg == a1 == a2:
                return 2 * Values[a1]
            if arg == a1:
                return Values[a2]
            if arg == a2:
                return Values[a1]
            else:
                assert False, f'partial_derivative({Node}, {arg})'
        case n, '/', a1, a2:
            if arg == a1 == a2:
                return 0
            if arg == a1:
                return 1 / Values[a2]
            if arg == a2:
                return -Values[a1] / Values[a2] ** 2
            else:
                assert False, f'partial_derivative({Node}, {arg})'
        case n, 'exp', a:
            return math.exp(Values[a])
        case n, 'log', a:
            return math.log(Values[a])
        case n, 'sin', a:
            return math.cos(Values[a])
        case n, 'cos', a:
            return -math.sin(Values[a])
        case n, 'atan', a:
            return 1 / (1 + Values[a]**2)
    return result

In [None]:
def adjoints(CG, Values, Variables):
    eval_graph(CG, Values)
    NodeDict = node_dictionary(CG)
    Parents  = parents(CG)
    n = len(CG)
    Adjoints = {}
    Adjoints['y'] = 1
    for k in range(2, n+1):
        Node   = CG[-k]
        print(f'inspecting {Node}')
        name   = Node[0]
        result = 0
        for parent_name in Parents[name]:
            parent_node = NodeDict[parent_name]
            result += Adjoints[parent_name] * partial_derivative(parent_node, name, Values)
        Adjoints[name] = result
    for x in Variables:
        result = 0
        for parent_name in Parents[x]:
            parent_node = NodeDict[parent_name]
            result += Adjoints[parent_name] * partial_derivative(parent_node, x, Values)
        Adjoints[x] = result
    return Adjoints

In [None]:
adjoints(CG, { 'x1': 0, 'x2': 1 }, { 'x1', 'x2' })

In [None]:
CG

In [None]:
import ast

In [None]:
t = ast.parse('x1 + x2')

In [None]:
ast.dump(t)

In [None]:
ast.dump(t.body[0])

In [None]:
t.body.