<h2> Introduction to Tensors </h2>

In [1]:
import numpy as np


class Tensor(object):

  def __init__(self,data):
    self.data = np.array(data)

  def __add__(self,other):
    return Tensor(self.data+other.data)

  def __repr__(self):
    return str(self.data.__repr__())

  def __mul__(self,other):
    return Tensor(self.data*other.data)

  def __neg__(self):
    return Tensor(self.data*-1)

  def __str__(self):
    return str(self.data.__str__())


a = Tensor([1,2,3,4])
b = Tensor([2,3,4,5])

print(a * b)

[ 2  6 12 20]


In [2]:
a + -b

array([-1, -1, -1, -1])

In [None]:
b = -b
a + b

TypeError: ignored

<h2> Automatic gradient computation </h2>

In [None]:
class Tensor(object):
  def __init__(self,data,creators=None,creation_op=None):
    self.data = np.array(data)
    self.creators = creators
    self.creation_op = creation_op
    self.grad = None

  def __add__(self,other):
    return Tensor(self.data + other.data,creators=[self,other],creation_op="add")

  def backward(self,grad):
    self.grad = grad

    if self.creation_op == "add":
      self.creators[0].backward(grad)
      self.creators[1].backward(grad)
  def __repr__(self):
    return str(self.data.__repr__())

  def __str__(self):
    return str(self.data.__str__())


a = Tensor([1,2,3,4])
b = Tensor([2,3,4,5])
c = a + b
print(c)

[3 5 7 9]


In [None]:
c.backward(Tensor(np.array([1,1,1,1])))

In [None]:
a = Tensor([1,2,3,4,5])
b = Tensor([2,2,2,2,2])
c = Tensor([5,4,3,2,1])
d = Tensor([-1,-2,-3,-4,-5])
e = a + b
f = c + d
g = e + f
g.backward(Tensor(np.array([1,1,1,1,1])))


In [None]:
a = Tensor([1,2,3,4,5])
b = Tensor([1,2,3,4,6])
c = Tensor([-1,-2,-3,-4,-5])
f = a + b
e = b + f
e.backward(Tensor([1,1,1,1,1]))


In [None]:
b.grad

array([1, 1, 1, 1, 1])

<h2> Autograd to support Multiuse tensors </h2>

In [None]:
class Tensor(object):
  def __init__(self,data,autograd = False,creators=None,creation_op=None,id=None):
    self.data = np.array(data)
    self.creators = creators
    self.creation_op = creation_op
    self.grad = None
    self.autograd = autograd
    self.children = {}
    if id is None:
      id = np.random.randint(0,100000)
    self.id = id

    if creators is not None:
      for c in creators:
        if self.id not in c.children:
          c.children[self.id] = 1
        else:
          c.children[self.id] += 1


  def __add__(self,other):
    if self.autograd and other.autograd:
      return Tensor(self.data + other.data,autograd=True,creators=[self,other],creation_op="add")
    return Tensor(self.data+other.data)

  def __repr__(self):
    return str(self.data.__repr__())
  
  def __str__(self):
    return str(self.data.__str__())

  def all_children_grads_accounted_for(self):
    for id,cnt in self.children.items():
      if cnt != 0:
        return False
    return True

  def backward(self,grad=None,grad_origin=None):
    if self.autograd:
      if grad_origin is not None:
        if self.children[grad_origin.id] == 0 :
          raise Exception("Cannot backprop more than once")
        else:
          self.children[grad_origin.id] -= 1
      if self.grad is None:
        self.grad = grad
      else:
        self.grad += grad
      
      if self.creators is not None and (self.all_children_grads_accounted_for() or grad_origin is None):
        if self.creation_op == 'add':
          self.creators[0].backward(self.grad,self)
          self.creators[1].backward(self.grad,self) 


In [None]:
a = Tensor([1,2,3,4,5],autograd=True)
b = Tensor([1,2,3,4,6],autograd=True)
c = Tensor([-1,-2,-3,-4,-5],autograd=True)
f = a + b
e = b + f
e.backward(Tensor([1,1,1,1,1]))


In [None]:
b.grad

array([2, 2, 2, 2, 2])

<h2> Adding support for negation </h2>


In [None]:
class Tensor(object):
  def __init__(self,data,autograd=False,creators=None,creation_op=None):

    self.data = np.array(data)
    self.autograd = autograd
    self.grad = None
    self.children = {}
    self.creators = creators
    self.creation_op = creation_op
    self.id = None
    if self.id is None:
      id = np.random.randint(0,100000)
    self.id = id

    if self.creators is not None:
      for c in self.creators:
        if self.id not in c.children:
          c.children[self.id] = 1
        else:
          c.children[self.id] += 1
    
  def __add__(self,other):
    if self.autograd and other.autograd:
      return Tensor(self.data+other.data,autograd=True,creators=[self,other],creation_op="add")
    return Tensor(self.data+other.data)

  
  def __neg__(self):
    if self.autograd:
      return Tensor(self.data*-1,autograd=True,creators=[self],creation_op="neg")
    return Tensor(self.data*-1)

  def all_children_grads_accounted_for(self):
    for id,count in self.children.items():
      if count != 0:
        return False
    return True


  def __str__(self):
    return self.data.__str__()

  def __repr__(self):
    return str(self.data.__repr__())

  def backward(self,grad,grad_origin=None):
    if self.autograd:
      if grad_origin is not None:
        if self.children[grad_origin.id] == 0:
          raise Exception("Cannot backprop more than once")
        else:
          self.children[grad_origin.id] -= 1

      if self.grad is None:
        self.grad = grad
      else:
        self.grad += grad

      if self.creators is not  None and (self.all_children_grads_accounted_for() or grad_origin is None):
        if self.creation_op == "add":
          self.creators[0].backward(self.grad,self)
          self.creators[1].backward(self.grad,self)
        
        elif self.creation_op == "neg":
          #Enew = self.grad.data * -1
          self.creators[0].backward(self.grad.__neg__(),self)




In [None]:
b = Tensor([1,2,3,4],autograd=True)
a = Tensor([4,5,6,7],autograd=True)
c = a + (-b)

In [None]:
c.backward(np.array([1,1,1,1]))

In [None]:
b.grad

array([-1, -1, -1, -1])

<h2> Adding other operations </h2>

In [135]:
class Tensor(object):
  def __init__(self,data,autograd=False,creators=None,creation_op=None):
    self.data = np.array(data)
    self.autograd = autograd
    self.creators = creators
    self.children = {}
    self.creation_op = creation_op
    self.id = None
    self.grad = None

    if self.id is None:
      id = np.random.randint(0,100000)
    self.id = id

    if self.creators is not None:
      for c in self.creators:
        if self.id not in c.children:
          c.children[self.id] = 1
        else:
          c.children[self.id] += 1

  def __add__(self,other):
    if self.autograd and other.autograd:
      return Tensor(self.data+other.data,autograd=True,creators=[self,other],creation_op="add")
    return Tensor(self.data+other.data)

  def __neg__(self):
    if self.autograd:
      return Tensor(self.data*-1,autograd=True,creators=[self],creation_op="neg")
    return Tensor(self.data*-1)

  def all_children_grads_accounted_for(self):
    for id,count in self.children.items():
      if count != 0:
        return False
    return True

  def __sub__(self,other):
    if self.autograd and other.autograd:
      return Tensor(self.data-other.data,autograd=True,creators=[self,other],creation_op="sub")
    return Tensor(self.data-other.data)

  def __mul__(self,other):
    if self.autograd and other.autograd:
      return Tensor(self.data*other.data,autograd=True,creators=[self,other],creation_op="mul")
    return Tensor(self.data*other.data)

  def expand(self,dim,copies):
    transpose_axis = list(range(0,len(self.data.shape)))
    transpose_axis.insert(dim,len(self.data.shape))
    new_shape = list(self.data.shape) + [copies]
    new_data = self.data.repeat(copies).reshape(new_shape)
    new_data = new_data.transpose(transpose_axis)

    if self.autograd:
      return Tensor(new_data,autograd=True,creators=[self],creation_op='expand_'+str(dim))
    return Tensor(new_data)
  
  def sum(self,dim):
    if self.autograd:
      return Tensor(self.data.sum(dim),autograd=True,creators=[self],creation_op="sum_"+str(dim))
    return Tensor(self.data.sum(dim))

  def mm(self,x):
    if self.autograd:
      #print(x.data.shape)
      #print(self.data.shape)
      return Tensor(self.data.dot(x.data),autograd=True,creators=[self,x],creation_op="mm")
    return Tensor(self.data.dot(x.data))

  def transpose(self):
    if self.autograd:
      return Tensor(self.data.transpose(),autograd=True,creators=[self],creation_op="transpose")
    return Tensor(self.data.transpose())

  def backward(self,grad=None,grad_origin=None):
    if self.autograd:
      if grad_origin is not None:
        if self.children[grad_origin.id] == 0:
          raise Exception("Cannot backprop more than once")
        else:
          self.children[grad_origin.id] -= 1
      if self.grad is None:
        self.grad = grad
      else:
        self.grad += grad

      if self.creators is not None and (self.all_children_grads_accounted_for() or grad_origin is None): # if node has creator and all its children nodes gradients are computed or if its leaf node then compute backward propagation
        if self.creation_op == "sub":
          new = Tensor(self.grad.data)
          self.creators[0].backward(new,self)
          
          self.creators[1].backward(self.grad.__neg__(),self)
        elif self.creation_op == "add":
          self.creators[0].backward(self.grad,self)
          self.creators[1].backward(self.grad,self)
        
        elif self.creation_op == "mul":
          new = self.grad * self.creators[1]
          self.creators[0].backward(new,self)
          new = self.grad * self.creators[0]
          self.creators[1].backward(new,self)
        
        elif self.creation_op == "mm":
          act = self.creators[0]
          weights = self.creators[1]
          new = self.grad.mm(weights.transpose())
          act.backward(new)
          new = self.grad.transpose().mm(act).transpose()
          weights.backward(new)
        
        elif self.creation_op == "transpose":
          self.creators[0].backward(self.grad.transpose())

        elif "sum" in self.creation_op:
          #print(self.creation_op)
          dim = int(self.creation_op.split('_')[1])
          ds = self.creators[0].data.shape[dim] 
          self.creators[0].backward(self.grad.expand(dim,ds))

        elif "expand" in self.creation_op:
          dim = int(self.creation_op.split('_')[1])
          self.creators[0].backward(self.grad.sum(dim))

## Using Autograd to train a neural network

In [72]:
import numpy as np
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]),autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]),autograd=True)

w = []
w.append(Tensor(np.random.rand(2,3),autograd=True))
w.append(Tensor(np.random.rand(3,1),autograd=True))

for i in range(10):

  pred = data.mm(w[0]).mm(w[1])
  
  loss = ((pred - target)*(pred - target)).sum(0)
  #print(loss)
  loss.backward(Tensor(np.ones_like(loss.data)))

  for w_ in w:
    w_.data -= w_.grad.data * 0.1
    w_.grad.data *= 0
  print(loss.data)

[0.58128304]
[0.48988149]
[0.41375111]
[0.34489412]
[0.28210124]
[0.2254484]
[0.17538853]
[0.1324231]
[0.09682769]
[0.06849361]


## Adding Automatic Optimization

In [87]:
class SGD:
  def __init__(self,parameters,lr=0.01):
    self.parameters = parameters
    self.lr = lr
  def zero_grad(self):
    ##print(self.parameters)
    for p in self.parameters:
      if not p.grad:
        break
      else:
        p.grad.data *= 0

  def step(self):

    for p in self.parameters:
      p.data -= p.grad.data * self.lr



In [89]:

data = Tensor(np.array([[0,1],[1,1],[1,0],[1,1]]),autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]),autograd=True) 

weights = []
weights.append(Tensor(np.random.rand(2,3),autograd=True))
weights.append(Tensor(np.random.rand(3,1),autograd=True))
sgd = SGD(parameters=weights,lr=0.01)
for i in range(10):

  pred = data.mm(weights[0]).mm(weights[1])

  loss = ((pred - target)*(pred - target)).sum(0)
  sgd.zero_grad()
  loss.backward(Tensor(np.ones_like(loss.data)))

  

  sgd.step()

  print(loss.data)



[2.00105624]
[1.33741065]
[1.00490479]
[0.82498041]
[0.72220759]
[0.66086417]
[0.62270955]
[0.5979335]
[0.58106149]
[0.56895512]


## Adding support for layer

In [101]:
class Layer:
  def __init__(self):
    self.parameters = []
  
  def get_parameters(self):
    return self.parameters

class Linear(Layer):
  def __init__(self,num_inputs,num_outputs):
    super(Linear,self).__init__()
    weights = np.random.rand(num_inputs,num_outputs)
    bias = np.zeros(num_outputs)

    self.weights = Tensor(weights,autograd=True) 
    self.bias = Tensor(bias,autograd=True)

    self.parameters.append(self.weights)
    self.parameters.append(self.bias)

  def forward(self,x):
    return x.mm(self.weights) + self.bias.expand(0,len(x.data))


  #def backward(self):



## Layers that contain layers

In [131]:
class Sequential(Layer):
  def __init__(self,layers=[]):
    super().__init__()
    self.layers = layers

  def add(self,layer):
    self.layers.append(layer)

  def forward(self,x):
    input = x
    for layer in self.layers:
      input = layer.forward(input)
    return input

  def get_parameters(self):
    params = list()
    for layer in self.layers:
      params += layer.get_parameters()
    return params 

## Loss Function Layers

In [133]:
class MSELoss(Layer):
  def __init__(self):

    super().__init__()

  def __call__(self,pred,targets):
    return ((pred - targets) * (pred - targets)).sum(0)
  #def forward(self,pred,targets):
    

  

In [136]:
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,1],[1,1]]),autograd=True) 
target = Tensor(np.array([[0],[1],[0],[1]]),autograd=True)


model = Sequential([Linear(2,3),Linear(3,1)])
#model.add()

optimizer = SGD(model.get_parameters(),lr=0.01)

loss = MSELoss()

for i in range(10):

  pred = model.forward(data)

  loss_value = loss(pred,target)

  optimizer.zero_grad()

  loss_value.backward(Tensor(np.ones_like(loss_value.data)))

  optimizer.step()



  print(loss_value.data)


[7.37534399]
[3.39089963]
[2.3662581]
[1.97188109]
[1.78049273]
[1.66799105]
[1.59038736]
[1.53015942]
[1.47971884]
[1.43552872]
