In [1]:
import random

def fuzz(k):
    return k * random.gauss(0,1)

def dot(x, y):
    return sum([xi * yi for xi, yi in zip(x, y)])

def vector_subtract(x1, x2):
    return [(x1i - x2i) for x1i, x2i in zip(x1, x2)]

def scalar_multiply(a, v):
    return [a*vi for vi in v]

def make_data(true_params, n=100, k=0.7):
    x = [[random.uniform(-3,3) for _ in range(len(true_params)-1)] + [1] for _ in range(n)]
    y = [dot(xi, true_params) + fuzz(k) for xi in x]
    return x, y

def mse(x, y, params):
    yhat = [dot(xi, params) for xi in x]
    return sum([(yi - yhati)**2 for yi, yhati in zip(y, yhat)])/len(y)

def grad_est_i(x, y, params, loss_fn, i, h=0.0001):
    params_nudged = [pi + (h if ix == i else 0) for ix, pi in enumerate(params)]
    return (loss_fn(x, y, params_nudged)-loss_fn(x, y, params))/h

def grad_est(x, y, params, loss_fn):
    return [grad_est_i(x, y, params, loss_fn, i) for i in range(len(params))]

def sgd(x, y, params_0, loss_fn, grad_fn, lr_0):
    params = params_0
    lr = lr_0
    min_params = None
    min_loss = float('inf')
    iterations_without_improvement = 0
    while iterations_without_improvement < 100:
        loss = loss_fn(x, y, params)
        if loss < min_loss:
            min_loss = loss
            min_params = params
            iterations_without_improvement = 0
        else:
            iterations_without_improvement += 1
            lr *= 0.9
        grad = grad_fn(x, y, params, loss_fn)
        params = vector_subtract(params, scalar_multiply(lr, grad))
    return params


In [2]:
true_params = [23, 3, -5]
x, y = make_data(true_params)
params_0 = [random.random() for _ in range(len(true_params))]
params = sgd(x, y, params_0, mse, grad_est, 0.01)
print(f"true params: {true_params}")
print(f"init params: {params_0}")
print(f"final params: {params}")

true params: [23, 3, -5]
init params: [0.2402337590410445, 0.8891349140682585, 0.7511619774585981]
final params: [23.038714006582534, 2.936773512916273, -4.997901258062224]
