Here we will implement an expression class for representing Real valued functions that take vectors as inputs.

In [29]:
import numpy as np

# Node Classes

# Base Expression
# the _b at the end means it supports backward mode
class Expression:
    def __init__(self):
        self.partial = 0
        self.value = 0

    def __str__():
        return "EXPRESSION" 

    def eval(self):
        self.partial = 0
        return "CANT EVALUATE BASE EXPRESSION CLASS"

    def derive():
        return "CANT DERIVE OF BASE EXPRESSION CLASS"

# Base Expression Types
    
class Un_Op(Expression):
    def __init__(self, a):
        Expression.__init__(self)
        self.a = a
    def __str__(self):
        return "[UNARY OP]"

class Bi_Op(Expression):
    def __init__(self, a, b):
        Expression.__init__(self)
        self.a = a
        self.b = b
    def __str__(self):
        return "[BINARY OP]"
    
# Usable Expression Types

# Constants
class Const(Un_Op):
    def __str__(self):
        return str(self.a)
    
    def eval(self):
        self.value = self.a
        return self.a
    def derive(self):
        return 0
    
class VConst(Un_Op):
    def __str__(self):
        return str(self.a)
    
    def eval(self):
        self.value = self.a
        return self.a
    def derive(self):
        return np.zeros(self.a.shape)

# Variables  
class Var(Expression):
    def __init__(self, name, value = None):
        '''
        name  : typeof(string)
        value : typeof(float)
        '''
        self.name = name
        self.value = value
        self.partial = 0
    def __str__(self):
        return self.name
    def eval(self):
        return self.value
    def derive(self):
        return 1
    
# Unary Operations
class Sum(Un_Op):
    def __str__(self):
        return f"Sum({str(self.a)})"
    def eval(self):
        self.value = np.sum(self.a.eval())
        return self.value
    def derive(self):
        dthis_da = np.ones(self.a.value.shape)
        self.a.partial += dthis_da * self.partial
        self.a.derive()
        return
    
class Sin(Un_Op):
    def __str__(self):
        return f"sin({str(self.a)})"
    def eval(self):
        self.value = np.sin(self.a.eval())
        return self.value
    def derive(self):
        dthis_da = np.cos(self.a.value)
        self.a.partial += dthis_da * self.partial # check if this is elementwise for np arrays
        self.a.derive()
        return

class Exp(Un_Op):
    def __str__(self):
        return f"exp({str(self.a)})"
    def eval(self):
        self.value = np.exp(self.a.eval())
        return self.value
    def derive(self):
        dthis_da = np.exp(self.a.value)
        self.a.partial += dthis_da * self.partial # check if this is elementwise for np arrays
        self.a.derive()
        return
    
class Sigmoid(Un_Op):
    def __str__(self):
        return f"s({str(self.a)})"
    def eval(self):
        self.value = np.exp(self.a.eval())
        return self.value
    def derive(self):
        dthis_da = np.exp(self.a.value)
        self.a.partial += dthis_da * self.partial # check if this is elementwise for np arrays
        self.a.derive()
        return

# Binary Operations
class SAdd(Bi_Op):
    def __str__(self):
        return f"({str(self.a)} + {str(self.b)})"
    def eval(self):
        self.value = self.a.eval() + self.b.eval()
        return self.value    
    def derive(self):
        dthis_da = 1
        dthis_db = 1
        self.a.partial += dthis_da * self.partial
        self.b.partial += dthis_db * self.partial
        self.a.derive()
        self.b.derive()
        return  

class VAdd(Bi_Op):
    def __str__(self):
        return f"({str(self.a)} + {str(self.b)})"
    def eval(self):
        self.value = self.a.eval() + self.b.eval()
        return self.value   
    def derive(self):
        self.a.partial += self.partial # check if this is elementwise for np arrays
        self.b.partial += self.partial # check if this is elementwise for np arrays
        self.a.derive()
        self.b.derive()   

class Mult(Bi_Op):
    '''
    Multiples:
        vectors * vectors (elementwise),
        scalars * scalars,
        vectors * scalars, or
        scalars * vectors
    '''
    def __str__(self):
        return f"({str(self.a)} + {str(self.b)})"
    def eval(self):
        self.value = self.a.eval() + self.b.eval()
        return self.value   
    def derive(self):
        dthis_da = self.b.value 
        dthis_db = self.a.value
        self.a.partial += dthis_da * self.partial
        self.b.partial += dthis_db * self.partial
        self.a.derive()
        self.b.derive()  
    
class MVMult(Bi_Op):
    def __str__(self):
        return f"({str(self.a)} * {str(self.b)})"  
    def eval(self):
        self.value = np.matmul(self.a.eval(), self.b.eval().squeeze())
        return self.value
    def derive(self): #I think this works? but it needs more testing
        print("self.partial", self.partial)
        dthis_da = np.transpose(self.b.value)
        dthis_db = np.transpose(self.a.value)
        self.a.partial += (dthis_da @ np.array([self.partial])).T # not sure why this works, but it does seem to...
        self.b.partial += dthis_db @ self.partial
        self.a.derive()
        self.b.derive()   

class MMMult(Bi_Op):
    def __str__(self):
        return f"({str(self.a)} * {str(self.b)})"  
    def eval(self):
        self.value = np.matmul(self.a.eval().squeeze(), self.b.eval().squeeze())
        return self.value
    def derive(self): # this doesnt work quite right yet
        print("self.partial", self.partial)
        dthis_da = np.transpose(self.b.value)
        dthis_db = np.transpose(self.a.value)
        self.a.partial += dthis_da @ self.partial
        self.b.partial += self.partial @ dthis_db
        self.a.derive()
        self.b.derive()   

In [21]:
# code that we want to work
A = Var("A", np.array([[2 ,2],[1,0]]))
x = Var("x", np.array([[1,0.5]]))
y = Var("y", np.array([1,0.5])) # note this is disgusting syntax, the fact we have to define vectors differently for addition and matrix multiplication

func = Sum(Exp(VAdd(MVMult(A, x), y)))
        
print("f =", func.eval())

func.partial = 1

func.derive()

print("df/dA =\n", A.partial)
print("df/dx =\n", x.partial)
print("df/dy =\n", y.partial)

# this code does seem to work when manually checking agains finite differences

f = 59.0798391034823
self.partial [54.59815003  4.48168907]
df/dA =
 [[54.59815003 27.29907502]
 [ 4.48168907  2.24084454]]
df/dx =
 [113.67798914 109.19630007]
df/dy =
 [54.59815003  4.48168907]


In [69]:
# Hardcoded feedforward NN example
# our nn architecture:
#
#
# R^l0      R^l1            R^l2            R^l3
# x -> activation_1 -> activation_2 -> outputs
# f(x) = s(W_23 @ s(W_12 @ s(W_01 @ inputs + b_1) + b_2) + b_3)

layerSizes = [2, 3, 3, 1]

x = Var("x", np.array([1, 0.5]))

W_01 = Var("W_01", np.random.normal(0, 0.001, [layerSizes[1], layerSizes[0]]))
W_12 = Var("W_12", np.zeros([layerSizes[2],layerSizes[1]]))
W_23 = Var("W_23", np.zeros([layerSizes[3],layerSizes[2]]))

b_1 = Var("b_1", np.zeros([1, layerSizes[1]]))
b_2 = Var("b_2", np.zeros([1, layerSizes[2]]))
b_3 = Var("b_3", np.zeros([1, layerSizes[3]]))

f = Sigmoid(VAdd(MVMult(W_01, x), b_1))

print(f.eval())

[[0.99883811 1.00028101 0.99922322]]
