In [1]:
import numpy as np

In [3]:
import random

In [2]:
# y = 2*x1 + 3*x2 + 4
X = np.array([
    [1, 1],
    [1, 2],
    [2, 2],
    [3, 4],
    [5, 6]
])
y = np.array([
    9,
    12,
    14,
    22,
    32
])

In [4]:
t1, t2, t3 = random.random(), random.random(), random.random()

In [5]:
def lin_reg(x1, x2, t1, t2, t3):
    return x1*t1 + x2*t2 + t3

In [6]:
lin_reg(X[0,0], X[0,1], t1, t2, t3)

1.4341087335335772

In [7]:
def mse_one_example(x1, x2, t1, t2, t3, y):
    return (lin_reg(x1, x2, t1, t2, t3) - y) ** 2

In [8]:
mse_one_example(X[0,0], X[0,1], t1, t2, t3, y[0])

57.242710655992894

In [10]:
def mse(X, y, t1, t2, t3):
    num_of_samples = len(y)
    total_mse = 0.
    for i in range(num_of_samples):
        total_mse += mse_one_example(X[i, 0], X[i, 1], t1, t2, t3, y[i])
    return total_mse / num_of_samples

In [12]:
mse(X, y, t1, t2, t3)

233.9825936564401

In [14]:
def der_mse_t1(X, y, t1, t2, t3):
    num_of_samples = len(y)
    total_mse_der = 0.
    for i in range(num_of_samples):
        total_mse_der += (lin_reg(X[i, 0], X[i, 1], t1, t2, t3) - y[i]) * X[i, 0]
    return 2 * total_mse_der / num_of_samples  

In [15]:
def der_mse_t2(X, y, t1, t2, t3):
    num_of_samples = len(y)
    total_mse_der = 0.
    for i in range(num_of_samples):
        total_mse_der += (lin_reg(X[i, 0], X[i, 1], t1, t2, t3) - y[i]) * X[i, 1]
    return 2 * total_mse_der / num_of_samples  

In [16]:
def der_mse_t3(X, y, t1, t2, t3):
    num_of_samples = len(y)
    total_mse_der = 0.
    for i in range(num_of_samples):
        total_mse_der += (lin_reg(X[i, 0], X[i, 1], t1, t2, t3) - y[i])
    return 2 * total_mse_der / num_of_samples  

In [20]:
def do_gradiend_update(t1, t2, t3, dt1, dt2, dt3, lr):
    return (
        t1 - lr * dt1,
        t2 - lr * dt2,
        t3 - lr * dt3
    )

In [21]:
def do_gradiend_step(X, y, t1, t2, t3, lr):
    dt1 = der_mse_t1(X,y,t1,t2,t3)
    dt2 = der_mse_t2(X,y,t1,t2,t3)
    dt3 = der_mse_t3(X,y,t1,t2,t3)
    
    return do_gradiend_update(t1, t2, t3, dt1, dt2, dt3, lr)

In [52]:
lr = 0.001

In [56]:
print("BEFORE UPDATE.\nT1: {}, \nT2: {}, \nT3:{}".format(t1,t2,t3))
print("LOSS: {}".format(mse(X,y,t1,t2,t3)))
print("")
dt1 = der_mse_t1(X,y,t1,t2,t3)
dt2 = der_mse_t2(X,y,t1,t2,t3)
dt3 = der_mse_t3(X,y,t1,t2,t3)
print("GRADIENTS. \nDT1: {}, \nDT2: {}, \nDT3:{}".format(dt1,dt2,dt3))
print("")
t1, t2, t3 = do_gradiend_update(t1, t2, t3, dt1, dt2, dt3, lr)
print("AFTER UPDATE.\nT1: {}, \nT2: {}, \nT3:{}".format(t1,t2,t3))
print("LOSS: {}".format(mse(X,y,t1,t2,t3)))

BEFORE UPDATE.
T1: 2.4969107388079412, 
T2: 3.3307229371525535, 
T3:0.8922746834381965
LOSS: 2.609551771234181

GRADIENTS. 
DT1: -0.4843401303795559, 
DT2: -0.8372617522128764, 
DT3:-1.8459414639301688

AFTER UPDATE.
T1: 2.497395078938321, 
T2: 3.3315601989047665, 
T3:0.8941206249021266
LOSS: 2.6052440281170335
