In [1]:
import weakref

In [2]:
import numpy as np

In [3]:
# but Python results in float after applying operations on 0-dimension ndarray.
# we need to deal with this issue.

# for make it easy, define a function

def as_array(x):
    if np.isscalar(x): # int and float are scalar
        return np.array(x)
    return x

In [4]:
# rather than applying 'add' or 'mul', what about using '+' or '*'?
# we go with operator overload

class Variable:
    def __init__(self, data, name=None):
        if data is not None:
            if not isinstance(data, np.ndarray):
                #raise TypeError('{} is not supported.'.format(type(data)))
                data = np.array(data)
                
        self.data = data
        self.name = name 
        self.grad = None
        self.creator = None
        self.generation = 0 

    def set_creator(self, func):
        self.creator = func
        self.generation = func.generation + 1 

    def backward(self, retain_grad=False): 
        if self.grad is None: 
            self.grad = np.ones_like(self.data)

        funcs = [] 
        seen_set = set() 

        def add_func(f): 
            if f not in seen_set:
                funcs.append(f)
                seen_set.add(f)
                funcs.sort(key=lambda x: x.generation) 

        add_func(self.creator)
        
        while funcs: 
            f = funcs.pop()
            gys = [output().grad for output in f.outputs] 
            gxs = f.backward(*gys) 
            if not isinstance(gxs, tuple): 
                gxs = (gxs,) 
            
            for x, gx in zip(f.inputs, gxs): 
                if x.grad is None: 
                    x.grad = gx
                else: 
                    x.grad = x.grad + gx 
                    
                if x.creator is not None: 
                    add_func(x.creator)

            if not retain_grad:
                for y in f.outputs:
                    y().grad = None 

    def cleargrad(self):
        self.grad = None

    @property 
    def shape(self): 
        return self.data.shape

    @property 
    def ndim(self): 
        return self.data.ndim

    @property 
    def size(self):
        return self.data.size

    @property 
    def dtype(self):
        return self.data.dtype

    def __len__(self):
        return len(self.data)

    def __repr__(self):
        if self.data is None:
            return 'variable(None)'
        p = str(self.data).replace('\n', '\n' + ' ' * 9)
        return 'variable(' + p + ')'

    def __mul__(self, other):
        return mul(self, other)

In [5]:
# We need forward prop results to do back prop. So, each result (inputs to functions) must be retained till back prop.
# Unlike ML, inference is only a process of forward prop. So, we don't need to keep all data in the middle.

# Here comes the mode without back prop!

class Config:
    enable_backprop = True


In [6]:
# how can we make Variable * np.array(2.0) workable?
# have np.array wear Variable dress!

def as_variable(obj): # this function makes obj a Variable
    if isinstance(obj, Variable):
        return obj
    return Variable(obj)

class Function: 
    def __init__(self):
        self.generation = 0
    
    def __call__(self, *inputs):
        inputs = [as_variable(x) for x in inputs] # convert all elements in inputs into Variable with as_variable()
        
        xs = [x.data for x in inputs]
        ys = self.forward(*xs)
        if not isinstance(ys, tuple):
                ys = (ys,)
        outputs = [Variable(as_array(y)) for y in ys]

        if Config.enable_backprop: 
            self.generation = max([x.generation for x in inputs]) 
        
            for output in outputs:
                output.set_creator(self) 
            
            self.inputs = inputs
            self.outputs = [weakref.ref(output) for output in outputs] 
            
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, xs): 
        raise NotImplementedError()
        
    def backward(self, gys): 
        raise NotImplementedError()


In [7]:
# '*' first

class Mul(Function): # define forward and backward prop function
    def forward(self, x0, x1):
        y = x0 * x1
        return y

    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        return gy*x1, gy*x0

def mul(x0, x1): # make multiplication work with mul()
    return Mul()(x0, x1)

In [8]:
# Improvement in backward prop
# SISO (single Variable) -> MIMO (list or tuple of Variables)

class Add(Function):
    def forward(self, x0, x1): # two inputs, one output
        y = x0 + x1
        return y
    
    def backward(self, gy): # one input, two outputs
        return gy, gy

In [9]:
# what about Variable + 3.0?

def add(x0, x1):
    x1 = as_array(x1) # ex. int/float -> ndarray
    return Add()(x0, x1) # ndarray -> Variable

In [10]:
# What if int/float is on the left of the operator?
# __mul__ for the left one, and __rmul__ for the right one. 
# Let's define __rmul__ as well so that int/float can be placed on left or right.

# add and mul are interoperable, so just assign the same function for reverse function. (__r __)
Variable.__add__ = add
Variable.__radd__ = add
Variable.__mul__ = mul
Variable.__rmul__ = mul

In [11]:
# example

x = Variable(np.array(2.0))
y = 3.0 * x + 1.0
print(y)

variable(7.0)


In [24]:
# Negate

class Neg(Function):
    def forward(self, x):
        return -x

    def backward(self, gy):
        return -gy

def neg(x):
    return Neg()(x)

Variable.__neg__ = neg


In [26]:
# example

x = Variable(np.array(2.0))
y = -x
print(y)

variable(-2.0)


In [28]:
# Subtraction

class Sub(Function):
    def forward(self, x0, x1):
        y = x0 - x1
        return y

    def backward(self, gy):
        return gy, -gy

def sub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x0, x1)

Variable.__sub__ = sub

In [30]:
def rsub(x0, x1):
    x1 = as_array(x1)
    return Sub()(x1, x0)

Variable.__rsub__ = rsub

In [32]:
# example

x = Variable(np.array(2.0))
y1 = 2.0 - x
y2 = x - 1.0
print(y1)
print(y2)

variable(0.0)
variable(1.0)


In [34]:
# Division

class Div(Function):
    def forward(self, x0, x1):
        y = x0 / x1
        return y

    def backward(self, gy):
        x0, x1 = self.inputs[0].data, self.inputs[1].data
        gx0 = gy / x1
        gx1 = gy * (-x0 / x1 ** 2)
        return gx0, gx1

def div(x0, x1):
    x1 = as_array(x1)
    return Div()(x0, x1)

def rdiv(x0, x1):
    x1 = as_array(x1)
    return Div()(x1, x0)

Variable.__truediv__ = div
Variable.__rtruediv__ = rdiv


In [36]:
# power

class Pow(Function):
    def __init__(self, c):
        self.c = c

    def forward(self, x):
        y = x ** self.c
        return y

    def backward(self, gy):
        x = self.inputs[0].data
        c = self.c
        gx = c * x ** (c - 1) * gy
        return gx

def pow(x, c):
    return Pow(c)(x)

Variable.__pow__ = pow


In [38]:
# example

x = Variable(np.array(2.0))
y = x ** 3
print(y)

variable(8.0)
