In [5]:
import random


class LinearRegression:
    def __init__(self, l_rate, n_epoch, n_train_size):
        self.l_rate = l_rate
        self.n_epoch = n_epoch
        self.n_train_size = n_train_size
        self.coeff = [0.0, 0.0, 0.0]

    def fit(self, X, y):
        """
        Fit linear model with SGD.
        """
        for epoch in range(self.n_epoch):
            sum_loss = self.get_loss(X, y)
            if epoch % 100 == 0:
                print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, self.l_rate, sum_loss))
    
    def get_coeff(self):
        return self.coeff[1:]
    
    def get_intercept(self):
        return self.coeff[0]
    
    # get the loss based on input X and label data y
    # J(θ)=1/(2m) ∑(hθ(x(i))−y(i))^2
    def get_loss(self, X, y):
        sum_loss = 0.0
        for X_row, y_row in zip(X, y):
            yhat = self.predict(X_row, self.coeff)
            loss = yhat - y_row
            sum_loss += loss**2
            self.coeff[0] = self.coeff[0] - self.l_rate * loss / n_train_size
            for i in range(len(X_row)):
                self.coeff[i+1] = self.coeff[i+1] - self.l_rate * loss * X_row[i] / n_train_size
        return sum_loss / 2 / n_train_size
        
    # make a prediction using coeff
    # hθ(x) = θTx=θ0 + θ1*x1 + θ2*x2
    def predict(self, X_row, coeff):
        yhat = coeff[0]
        for i in range(len(X_row)):
            yhat += coeff[i+1] * X_row[i]
        return yhat

In [15]:
if __name__ == '__main__':
    X = [[random.randint(1, 30), random.randint(1, 30)] for _ in range(20)]
    y = [x[0] + 5 * x[1] + 7 for x in X]
    l_rate = 0.005
    n_epoch = 8000
    n_train_size = len(X)
    linear_reg = LinearRegression(l_rate, n_epoch, n_train_size)
    linear_reg.fit(X, y)
    delta = 0.1
    intercept = linear_reg.get_intercept()
    coeff = linear_reg.get_coeff()
    print("intercept: {}".format(intercept))
    print("coeff: {}".format(coeff))
    assert abs(coeff[0] - 1) < delta
    assert abs(coeff[1] - 5) < delta
    assert abs(intercept - 7) < delta

>epoch=0, lrate=0.005, error=1356.723
>epoch=100, lrate=0.005, error=3.139
>epoch=200, lrate=0.005, error=2.703
>epoch=300, lrate=0.005, error=2.327
>epoch=400, lrate=0.005, error=2.004
>epoch=500, lrate=0.005, error=1.726
>epoch=600, lrate=0.005, error=1.486
>epoch=700, lrate=0.005, error=1.279
>epoch=800, lrate=0.005, error=1.102
>epoch=900, lrate=0.005, error=0.949
>epoch=1000, lrate=0.005, error=0.817
>epoch=1100, lrate=0.005, error=0.703
>epoch=1200, lrate=0.005, error=0.606
>epoch=1300, lrate=0.005, error=0.521
>epoch=1400, lrate=0.005, error=0.449
>epoch=1500, lrate=0.005, error=0.387
>epoch=1600, lrate=0.005, error=0.333
>epoch=1700, lrate=0.005, error=0.287
>epoch=1800, lrate=0.005, error=0.247
>epoch=1900, lrate=0.005, error=0.213
>epoch=2000, lrate=0.005, error=0.183
>epoch=2100, lrate=0.005, error=0.158
>epoch=2200, lrate=0.005, error=0.136
>epoch=2300, lrate=0.005, error=0.117
>epoch=2400, lrate=0.005, error=0.101
>epoch=2500, lrate=0.005, error=0.087
>epoch=2600, lrate=0.