In [7]:
import random

def dot(v1, v2):
    return sum([(v1i*v2i) for v1i, v2i in zip(v1, v2)])

def fuzz(k):
    return random.gauss(0,1) * k

def make_data(p, n=100, k=0.7):
    x = [[random.uniform(-3,3) for _ in range(len(p)-1)] + [1] for _ in range(n)]
    y = [dot(xi,p) + fuzz(k) for xi in x]
    return x, y

def vector_subtract(v1, v2):
    return [(v1i - v2i) for v1i, v2i in zip(v1, v2)]

def scalar_multiply(a, v):
    return [a*vi for vi in v]

def mse(x, y, params):
    yhat = [dot(xi, params) for xi in x]
    return sum([(yhat_i - y_i)**2 for yhat_i, y_i in zip(yhat, y)]) / len(y)

def grad_est_i(x, y, params, loss_fn, ix, h=0.00001):
    params_nudged = [pi + (h if i == ix else 0) for i, pi in enumerate(params)]
    return (loss_fn(x, y, params_nudged) - loss_fn(x, y, params)) / h
    
def grad_est(x, y, params, loss_fn):
    return [grad_est_i(x, y, params, loss_fn, ix) for ix, pi in enumerate(params)]

def sgd(x, y, params_0, loss_fn, grad_fn, lr_0):
    params = params_0
    lr = lr_0
    min_loss = float('inf')
    min_params = None
    interventions_without_improvement = 0
    while interventions_without_improvement < 100:
        loss = loss_fn(x, y, params)
        if loss < min_loss:
            min_loss = loss
            min_params = params
            interventions_without_improvement = 0
        else:
            interventions_without_improvement += 1
            lr *= 0.9
        grad = grad_fn(x, y, params, loss_fn)
        params = vector_subtract(params, scalar_multiply(lr, grad))
    return params


In [9]:
true_params = (23, 3, -5)
x, y = make_data(true_params)
params_0 = [random.random() for _ in range(len(true_params))]
lr_0 = 0.1
params = sgd(x, y, params_0, mse, grad_est, lr_0)
print(f"init params: {params_0}")
print(f"final params: {params}")
print(f"true params: {true_params}")

init params: [0.24605286925659586, 0.15282021901362186, 0.6188040976320138]
final params: [23.01139574672979, 2.9706558181732357, -5.047527921846479]
true params: (23, 3, -5)
