In [43]:
from autograd.tensor import Tensor as tn
from autograd.dropout import *
from autograd.parameter import Parameter as pr
from autograd.flatten import Flatten as fl
from autograd.module import Module
from autograd.nn import Sequential
from autograd.linear import Linear
from autograd.act import Relu, Sigmoid 
from autograd.loss import CEL
from autograd.optim import GD
import matplotlib.pyplot as plt
import numpy as np

In [59]:
import time
import numpy
np.random.seed(0)

data = tn(np.array([[0,0],[0,1],[1,0],[1,1]]),True)
target = tn(np.array([[0],[1],[0],[1]]),True)

w = list()
weights_0_1 = tn(np.random.rand(2,3),True)
weights_1_2 = tn(np.random.rand(3,1),True)
w.append(weights_0_1)
w.append(weights_1_2)



start = time.time()
for i in range(10):
    
    # Predict
    layer_1 = data @ weights_0_1
    layer_2 = layer_1 @ weights_1_2
    # Compare
    diff = (layer_2 - target)
    sqdiff = (diff * diff)
    loss = sqdiff.sum() # mean squared error loss

    # Learn: this is the backpropagation piece
    loss.backward()

    for w_ in w:
        w_.data -= w_.grad.data * 0.1
        w_.zero_grad()
    print(loss)
end = time.time()
print(end - start)

Tensor(2.973981747096818, requires_grad=True)
Tensor(1.1525848858146912, requires_grad=True)
Tensor(0.8092322200712426, requires_grad=True)
Tensor(0.648900955935711, requires_grad=True)
Tensor(0.5302958839700223, requires_grad=True)
Tensor(0.42924981305548227, requires_grad=True)
Tensor(0.3420929727522987, requires_grad=True)
Tensor(0.26729885677431675, requires_grad=True)
Tensor(0.2040379153116074, requires_grad=True)
Tensor(0.15171079130161946, requires_grad=True)
0.010184049606323242


In [51]:
import numpy as np

class Tensor (object):
    
    def __init__(self,data,
                 autograd=False,
                 creators=None,
                 creation_op=None,
                 id=None):
        
        self.data = np.array(data)
        self.autograd = autograd
        self.grad = None
        if(id is None):
            self.id = np.random.randint(0,100000)
        else:
            self.id = id
        
        self.creators = creators
        self.creation_op = creation_op
        self.children = {}
        
        if(creators is not None):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

    def all_children_grads_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True 
        
    def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
 
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad
            
            # grads must not have grads of their own
            assert grad.autograd == False
            
            # only continue backpropping if there's something to
            # backprop into and if all gradients (from children)
            # are accounted for override waiting for children if
            # "backprop" was called on this variable directly
            if(self.creators is not None and 
               (self.all_children_grads_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(Tensor(self.grad.data), self)
                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim,
                                                               self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())
                    
    def __add__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="add")
        return Tensor(self.data + other.data)

    def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
    def __sub__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
    def __mul__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="mul")
        return Tensor(self.data * other.data)    

    def sum(self, dim):
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_"+str(dim))
        return Tensor(self.data.sum(dim))
    
    def expand(self, dim,copies):

        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_"+str(dim))
        return Tensor(new_data)
    
    def transpose(self):
        if(self.autograd):
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        
        return Tensor(self.data.transpose())
    
    def mm(self, x):
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self,x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())  
    
a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([5,4,3,2,1], autograd=True)

d = a + b
e = b + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

print(b.grad.data == np.array([2,2,2,2,2]))

[ True  True  True  True  True]


In [60]:
import numpy
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

w = list()
w.append(Tensor(np.array([[0.85794562 ,0.84725174 ,0.6235637 ],
 [0.38438171 ,0.29753461 ,0.05671298]]), autograd=True))
w.append(Tensor(np.array([[0.47766512],
 [0.81216873],
 [0.47997717]]), autograd=True))

start = time.time()

for i in range(10):

    # Predict
    pred = data.mm(w[0]).mm(w[1])
    
    # Compare
    loss = ((pred - target)*(pred - target)).sum(0)
    
    # Learn
    loss.backward(Tensor(np.ones_like(loss.data)))

    for w_ in w:
        w_.data -= w_.grad.data * 0.1
        w_.grad.data *= 0

    print(loss)
end = time.time()
print(end - start)

[2.97398178]
[1.15258489]
[0.80923222]
[0.64890096]
[0.53029588]
[0.42924981]
[0.34209297]
[0.26729886]
[0.20403792]
[0.15171079]
0.017199277877807617


In [50]:
import numpy
np.random.seed(0)

data = np.array([[0,0],[0,1],[1,0],[1,1]])
target = np.array([[0],[1],[0],[1]])

weights_0_1 = np.array([[0.85794562 ,0.84725174 ,0.6235637 ],
 [0.38438171 ,0.29753461 ,0.05671298]])

weights_1_2 = np.array([[0.47766512],
 [0.81216873],
 [0.47997717]])
print(weights_1_2)

for i in range(10):
    
    # Predict
    layer_1 = data @ weights_0_1
    layer_2 = layer_1 @ weights_1_2

    # Compare
    diff = (layer_2 - target)
    sqdiff = (diff * diff)
    loss = sqdiff.sum(0) # mean squared error loss

    # Learn: this is the backpropagation piece
    layer_1_grad = diff.dot(weights_1_2.transpose())
    weight_1_2_update = layer_1.transpose().dot(diff)
    weight_0_1_update = data.transpose().dot(layer_1_grad)
    
    weights_1_2 -= weight_1_2_update * 0.1
    weights_0_1 -= weight_0_1_update * 0.1
    print(loss[0])

[[0.47766512]
 [0.81216873]
 [0.47997717]]
2.9739817759207106
1.1386077634035665
0.9503752598761414
0.8477935879208961
0.7687994191412023
0.7000083625355942
0.6374749719001984
0.579685958415488
0.5258833043645329
0.4756057918655521


In [64]:
r = tn(np.random.randn(4,3,2,3))
r

Tensor([[[[-0.94035354  0.92959433 -1.06279492]
   [-0.88640627  1.92134696 -0.45978052]]

  [[-1.08903444  0.98411729 -1.15920632]
   [-0.43653709  1.00924453  0.71338957]]

  [[-0.72805772  0.83951646  1.23902098]
   [-1.78480389 -0.79618584 -1.40054127]]]


 [[[-0.18435058 -1.39119312  0.03625974]
   [-0.81440556  0.69737282 -1.73742924]]

  [[ 0.1158557   0.36565145 -0.07392347]
   [-0.49351757  3.10153058  0.85875415]]

  [[-1.15477553  0.94183434 -0.28213514]
   [-0.97565467  0.09818669  0.90548995]]]


 [[[ 1.01874144 -0.11489885  1.74303872]
   [-0.32187919  0.82957109 -0.207318  ]]

  [[ 1.11799861  1.06424968  1.15132983]
   [-0.77245771 -1.29363428  0.67702681]]

  [[ 0.4240552  -0.48567617 -0.05169724]
   [ 0.56705637  1.06783358  0.27159574]]]


 [[[ 0.61930177 -0.058626    1.2565714 ]
   [ 0.2967472   0.39858573 -1.05317438]]

  [[-0.63947627 -0.14852688 -1.57458215]
   [-0.4956882  -0.11757479  0.37312016]]

  [[-0.38602877 -1.23059312 -1.95827161]
   [-0.51796269  0.579

In [67]:
r[...,2]

Tensor([[[-1.06279492 -0.45978052]
  [-1.15920632  0.71338957]
  [ 1.23902098 -1.40054127]]

 [[ 0.03625974 -1.73742924]
  [-0.07392347  0.85875415]
  [-0.28213514  0.90548995]]

 [[ 1.74303872 -0.207318  ]
  [ 1.15132983  0.67702681]
  [-0.05169724  0.27159574]]

 [[ 1.2565714  -1.05317438]
  [-1.57458215  0.37312016]
  [-1.95827161 -1.35373284]]], requires_grad=False)