# Gradients without backpropagation

Here I'm testing their approach using a simple regression model $h(\theta,X) = \theta_0 + \theta_1x_0 + \theta_2x_1$  

In [1]:
# Simple script that runs my forward gradient

import numpy as np


def hypothesis(X, theta):
    """
    h = theta_1@x + theta_0
    """
    return theta[0] + X@theta[1:]


def mse_loss(X, y, theta): 
    """
    MSE loss: ((h(x)-y)**2)/2n
    """   
    return np.mean((hypothesis(X, theta) - y)**2) / 2


def mse_loss_derivative(X, y, theta):
    """
    MSE derivative: [((h(x)-y))]
    """   
    
    d_theta_0 = np.mean(hypothesis(X, theta) - y)
    d_theta_N = X.T@(hypothesis(X, theta) - y) / len(X)
    return np.concatenate(([d_theta_0], d_theta_N))
    

def mse_estimated_derivative(X,y,theta, h=1e-10, n_v=100000):
    
    estimated_theta = []

    V = np.random.normal(0,1, n_v) # ~N(0,1)
    h_V = h*V

    # Estimating the partial derivative for each theta: https://en.wikipedia.org/wiki/Partial_derivative
    for i,t in enumerate(theta):


        # | t_0, t_1+h*v_0, t_2
        # | t_0, t_1+h*v_1, t_2
        # | t_0, t_1+h*v_n, t_2
        thetas= np.tile(theta, n_v).reshape(n_v,theta.shape[0])
        thetas[:, i] += h_V

        
        e_theta = [ ((mse_loss(X, y, t) - mse_loss(X, y, theta))/h)*v  for v,t in zip(V,thetas)] # Definicao de derivada        
        e_theta = np.mean(e_theta) # averaging

        estimated_theta.append(e_theta)
        pass

    return estimated_theta        

    pass



np.random.seed(0)
X = np.random.rand(10,2)
y = np.random.rand(10)
theta = np.array([1,0.5,0.2])

print("True derivative")
print(mse_loss_derivative(X,y,theta))
print("*******")
print("Estimated derivative")
print(mse_estimated_derivative(X,y,theta, n_v=100000))


True derivative
[0.80854047 0.49895263 0.55537556]
*******
Estimated derivative
[0.8041822168887169, 0.49626311208377466, 0.5523819027735966]
