In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

from yra_grad.tensor import Tensor

In [3]:
class Optimizer:
    def __init__(self, parameters):
        for param in parameters:
            if not isinstance(param, Tensor):
                raise ValueError("all parameters should be Tensor instances")
                
        self.parameters = parameters
        
    def zero_grad(self):
        for param in self.parameters:
            param.zero_grad()

In [4]:
class SGD(Optimizer):
    def __init__(self, parameters, lr):
        super(SGD, self).__init__(parameters)
        self.lr = lr
        
    def step(self):
        for param in self.parameters:
            #print("before step", param.prev)
            param.step(param.data * self.lr)
            #print("after step", param.prev)

In [5]:
x_init = np.random.randn(3,3).astype(np.float32)
W_init = np.random.randn(3,3).astype(np.float32)

x = Tensor(x_init)
W = Tensor(W_init)


#print(x, W)
optim = SGD([W], 0.5)

for _ in range(10):

    out = ((x @ W) ** 2).sum()
    out.backward()

    optim.step()
    optim.zero_grad()
    
    print(out.data)

39.14371
9.785928
2.446482
0.6116205
0.15290512
0.03822628
0.00955657
0.0023891425
0.00059728563
0.00014932141
