In [4]:
import numpy as np
import cupy as cp
from __future__ import annotations

In [5]:
x_gpu = cp.array([1, 2, 3])
x_cpu = x_gpu.get()
type(x_cpu)
type(x_gpu)

# lets keep things on purely on the cpu for now

cupy.ndarray

In [32]:
class Tensor:
  def __init__(self, list: list) -> None:
    self.name: str = ""
    self.values: np.array = np.array(list)
    self.parents: tuple[Tensor, Tensor] = (None, None)
    self.gradient: np.array = np.zeros_like(self.values)
    self._backward: function = lambda *args: None
    self.visited = False

  def __add__(self, other: Tensor) -> Tensor:
    child = Tensor(self.values + other.values)
    child.parents = (self, other)
    def _backward() -> None:
      self.gradient = 1 * child.gradient
      other.gradient = 1 * child.gradient
    child._backward = _backward
    return child
  
  def __mul__(self, other: Tensor) -> Tensor:
    child = Tensor(self.values + other.values)
    child.parents = (self, other)
    def _backward() -> None:

      self.gradient = other.values * child.gradient # mistake made using Tensor object instead of tensor value
      other.gradient = self.values * child.gradient
    child._backward = _backward
    return child
  
  def topological_sort(self) -> list[Tensor]:
    dfs_sort: list[Tensor] = []
    def dfs(node: Tensor):
      if node.parents[0]:
        dfs(node.parents[0])
      if node.parents[1]:
        dfs(node.parents[1])
      if not node.visited:
        dfs_sort.append(node); node.visited = True
    dfs(self); return list(reversed(dfs_sort))

  def backward(self):
    self.gradient = np.ones_like(self.values) # gradient w.r.t self
    for node in self.topological_sort():
      node._backward()
      print(node)

  def zero_grad(self):
    self.gradient = np.zeros_like(self.values)
  
  def __repr__(self) -> str:
    return f"{self.name}: {self.values} : {self.gradient}"

In [35]:
a = Tensor([1, 2, 3]); a.name = "a"
b = Tensor([2, 4, 6]); b.name = "b"
c = Tensor([3, 6, 9]); c.name = "c"
d = a * b; d.name = "d"
e = d + c; e.name = "e"
f = e * a; f.name = "f"
f.backward()


f: [ 7 14 21] : [1 1 1]
e: [ 6 12 18] : [1 2 3]
c: [3 6 9] : [1 2 3]
d: [3 6 9] : [1 2 3]
b: [2 4 6] : [1 4 9]
a: [1 2 3] : [ 2  8 18]
