In [1]:
import math

from lilgrad.core import Value
from lilgrad.utils import all_derivatives


In [2]:
def compare_results(l1, l2, acceptable_error):
    return [abs(v1 - v2) < acceptable_error for (v1, v2) in zip(l1, l2)], l1, l2


In [3]:
def check_grad(f, a, b=None, acceptable_error=1e-5):
    v1 = Value(a)

    if b is not None:
        v2 = Value(b)
        v3 = f(v1, v2)
    
        c_approx = all_derivatives(f, [a,b])
        v3.backward()
        c = [v1.grad, v2.grad]

    else:
        v2 = f(v1)
        c_approx = all_derivatives(f, [a])
        v2.backward()
        c = [v1.grad]
    
    return compare_results(c, c_approx, acceptable_error)
    

In [4]:
%psource Value.__mul__


    [0;34m@[0m[0minput_to_value[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__mul__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mother[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m [0;34m*[0m [0mother[0m[0;34m.[0m[0mdata[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m [0mother[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'*'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0mother[0m[0;34m.[0m[0mdata[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m            [0mother[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0mself[0m[0;34m.[0m[0mdata[

In [5]:
check_grad(lambda a, b: a * b, 50, 23)


([True, True], [23.0, 50.0], [23.00000005561742, 49.999999873762135])

In [6]:
%psource Value.__pow__


    [0;34m@[0m[0minput_to_value[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__pow__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mother[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m [0;34m**[0m [0mother[0m[0;34m.[0m[0mdata[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m [0mother[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'**'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0mother[0m[0;34m.[0m[0mdata[0m [0;34m*[0m [0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m [0;34m**[0m [0;34m([0m[0mother[0m[0;34m.[0m[0mdata[0m [0;34m-[0m [0;36m1

In [7]:
check_grad(lambda a, b: a**b, 2, 2)


([True, True],
 [4.0, 2.772588722239781],
 [3.9999999956741306, 2.7725887230545254])

In [8]:
%psource Value.exp


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mexp[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mexp[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'exp'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0mout[0m[0;34m.[0m[0mdata[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [9]:
check_grad(lambda a: a.exp() if isinstance(a, Value) else math.exp(a), 3)


([True], [20.085536923187668], [20.085536931446768])

In [10]:
%psource Value.__truediv__


    [0;32mdef[0m [0m__truediv__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mother[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mself[0m [0;34m*[0m [0;34m([0m[0mother[0m [0;34m**[0m [0;34m-[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m


In [11]:
check_grad(lambda a, b: a / b, 289, 49)


([True, True],
 [0.02040816326530612, -0.1203665139525198],
 [0.020408159606688514, -0.12036651675373378])

In [12]:
%psource Value.log


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mlog[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mlog[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'log'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0;36m1[0m [0;34m/[0m [0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [13]:
check_grad(lambda a: a.log() if isinstance(a, Value) else math.log(a), 3)


([True], [0.3333333333333333], [0.3333333331578814])

In [14]:
%psource Value.sin


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0msin[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0msin[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'sin'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0mmath[0m[0;34m.[0m[0mcos[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [15]:
check_grad(lambda a: a.sin() if isinstance(a, Value) else math.sin(a), 3)


([True], [-0.9899924966004454], [-0.9899924972855967])

In [16]:
%psource Value.cos


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mcos[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mcos[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'cos'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0;34m-[0m[0mmath[0m[0;34m.[0m[0msin[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34

In [17]:
check_grad(lambda a: a.cos() if isinstance(a, Value) else math.cos(a), 3)


([True], [-0.1411200080598672], [-0.14112000845667438])

In [18]:
%psource Value.tan


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mtan[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mtan[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'tan'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0;36m1[0m [0;34m/[0m [0;34m([0m[0mmath[0m[0;34m.[0m[0mcos[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m [0;34m**[0m [0;36m2[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[0m 

In [19]:
check_grad(lambda a: a.tan() if isinstance(a, Value) else math.tan(a), 3)


([True], [1.020319516942427], [1.0203195176250457])

In [20]:
%psource Value.sinh


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0msinh[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0msinh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'sinh'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0mmath[0m[0;34m.[0m[0mcosh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [21]:
check_grad(lambda a: a.sinh() if isinstance(a, Value) else math.sinh(a), 3)


([True], [10.067661995777765], [10.067662001844724])

In [22]:
%psource Value.cosh


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mcosh[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mcosh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'cosh'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0mmath[0m[0;34m.[0m[0msinh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [23]:
check_grad(lambda a: a.cosh() if isinstance(a, Value) else math.cosh(a), 3)


([True], [10.017874927409903], [10.017874929602044])

In [24]:
%psource Value.tanh


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mtanh[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmath[0m[0;34m.[0m[0mtanh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'tanh'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0;34m([0m[0;36m1[0m [0;34m/[0m [0;34m([0m[0mmath[0m[0;34m.[0m[0mcosh[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m [0;34m**[0m [0;36m2[0m[0;34m)[0m[0;34m)[0m [0;34m*[0m [0mout[0m[0;34m.[0m[0mgrad[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mreturn[

In [25]:
check_grad(lambda a: a.tanh() if isinstance(a, Value) else math.tanh(a), 3)


([True], [0.009866037165440192], [0.00986603687636034])

In [26]:
%psource Value.sigmoid


    [0;32mdef[0m [0msigmoid[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0;36m1[0m [0;34m/[0m [0;34m([0m[0;36m1[0m [0;34m+[0m [0;34m([0m[0;34m-[0m[0mself[0m[0;34m)[0m[0;34m.[0m[0mexp[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m


In [None]:
def sigmoid(x):
    1 / (1 + math.exp(-x))


In [27]:
check_grad(lambda a: a.sigmoid() if isinstance(a, Value) else x, 3)


([True], [0.045176659730912144], [0.045176660190549])

In [28]:
%psource Value.relu


    [0;34m@[0m[0mset_out_backward[0m[0;34m([0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mrelu[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mout[0m [0;34m=[0m [0mValue[0m[0;34m([0m[0mmax[0m[0;34m([0m[0;36m0[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mdata[0m[0;34m)[0m[0;34m,[0m [0;34m([0m[0mself[0m[0;34m,[0m[0;34m)[0m[0;34m,[0m [0m_op[0m[0;34m=[0m[0;34m'relu'[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0;34m[0m
[0;34m[0m        [0;32mdef[0m [0m_backward[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m            [0mself[0m[0;34m.[0m[0mgrad[0m [0;34m+=[0m [0mout[0m[0;34m.[0m[0mgrad[0m [0;32mif[0m [0mout[0m[0;34m.[0m[0mdata[0m [0;34m>[0m [0;36m0[0m [0;32melse[0m [0;36m0[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0;32mreturn[0m [0mout[0m[0;34m,[0m [0m_backward[0m[0;34m[0m[0;34m[0m[0m


In [29]:
check_grad(lambda a: a.relu() if isinstance(a, Value) else max(a, 0), 3)


([True], [1.0], [1.0000000005838672])

In [30]:
def more_complicated_fn(a, b):
    return 3 + a.pow(b).sigmoid().relu().exp().sin()


check_grad(lambda a, b: more_complicated_fn(a, b))
