This notebook performs the flowing two major operations:

1.   Generate single variable (+ bias) sample data with linear relationship 
2.   Perform gradient descent Linear Regression training.

The main purpose of this notebook is to demonstrate the working principle of linear regression for the single (+ bias) case which can be easily extended to n valiable case (polinomial regression).



In [7]:
import numpy as np
import pandas as pd
import plotly.express as px

**Class to generate single variable (+ bias) linear data with random Gaussian noise**

In [17]:
class LinearDataWithRandomNoise:
  def __init__(self, intercept=0, slope=1, nb_of_sample=100):
    self.slope = slope
    self.intercept = intercept
    self.nb_of_sample = nb_of_sample
    self.data = self._generate_data()
  
  def _linear(self, x):
    return self.slope*x + self.intercept

  def _generate_data(self):
    errors = np.random.normal(scale=0.15, size=self.nb_of_sample)
    x = np.random.uniform(size=self.nb_of_sample)
    df = pd.DataFrame({'x': x, 'e': errors})
    df['y'] = df.apply(lambda x:self._linear(x.x) + x.e, axis=1)

    return df
  
  def get_data(self):
    return self.data.sort_values('x')

**Gradient descent Linear Regression Model**

In [18]:
class LinearRegressionGradientDescent:
    def __init__(self, intercept=0, slope=1, learning_rate=0.01, 
                 nb_iterations=100, log_results=True):
        self.beta_0 = intercept
        self.beta_1 = slope
        self.learning_rate = learning_rate
        self.nb_iterations = nb_iterations
        self.log_results = log_results
        self.intermediate_y = {}
        self.training_error = {}

    def _linear(self, x):
        return self.beta_0 + self.beta_1 * x

    def fit(self, X, y):
        self.intermediate_y['start'] = self._linear(X)
        for i in range(self.nb_iterations):
            gradient_beta_0 = np.sum(self.beta_0 + self.beta_1 * X - y)
            gradient_beta_1 = np.sum((self.beta_0 + self.beta_1 * X - y) * X)
            # we are minimizing, so graidents are made negative
            self.beta_0 -= self.learning_rate * gradient_beta_0
            self.beta_1 -= self.learning_rate * gradient_beta_1
            if self.log_results:
                y_hat = self._linear(X)
                self.training_error[i] = [0.5 * np.sum((y_hat - y) ** 2)]
                self.intermediate_y[i] = y_hat
                print('loss: {}'.format(self.training_error[i][0]))

    def get_model_params(self):
        return self.beta_0, self.beta_1

    def get_intermediate_y(self):
        return self.intermediate_y

    def get_training_loss(self):
        df = pd.DataFrame(self.training_error).T
        df = df.rename(columns={df.columns[0]: "squared_loss"})
        return df

In [19]:
def display_training_progression(train_data, intermediate_y, 
                                 initial_iterations=2):
    nb_data_points = len(intermediate_y) - 1
    mid_point_index = nb_data_points // 2
    fig = px.scatter(x=data.x, y=data.y)
    fig.add_scatter(x=data.x, y=intermediate_y['start'], name="start")
    for i in range(initial_iterations):
        fig.add_scatter(x=data.x, y=intermediate_y[i], 
                        name="model iter_{}".format(i))
    if mid_point_index > initial_iterations:
        fig.add_scatter(x=data.x, y=intermediate_y[mid_point_index], 
                        name="model (iter_{})".format(mid_point_index))
    if nb_data_points > initial_iterations:
        fig.add_scatter(x=data.x, y=intermediate_y[nb_data_points - 1],
                        name="model (iter_{})".format(nb_data_points - 1))
    fig.show()

**Generate some sample training data**

In [20]:
NB_OF_SAMPLE_DATA_POINTS = 100
PARAM_INTERCEPT = 0
PARAM_SLOPE = 1
data = LinearDataWithRandomNoise(intercept=PARAM_INTERCEPT, slope=PARAM_SLOPE,
                              nb_of_sample=NB_OF_SAMPLE_DATA_POINTS).get_data()
fig = px.scatter(x=data.x, y=data.y)
fig.show()

**Gradient descent training**

In [21]:
PARAM_INTERCEPT_TEST = 2
PARAM_SLOPE_TEST = -1
my_lr = LinearRegressionGradientDescent(intercept=PARAM_INTERCEPT_TEST, 
                                    slope=PARAM_SLOPE_TEST, nb_iterations=100)
my_lr.fit(data.x, data.y)
loss = my_lr.get_training_loss().squared_loss
fig = px.scatter(x=loss.index, y=loss).update_layout(
    xaxis_title="training iteration", yaxis_title="loss")
fig.show()

loss: 24.432761833891426
loss: 18.967818843504
loss: 16.483670052769398
loss: 14.506884428350562
loss: 12.797323380048914
loss: 11.306303318368222
loss: 10.004823881767368
loss: 8.868701137452572
loss: 7.876918557558195
loss: 7.011137633774864
loss: 6.255350351958403
loss: 5.595582501353821
loss: 5.019635237561379
loss: 4.516859525791662
loss: 4.077959244049647
loss: 3.69481930151885
loss: 3.3603555937721383
loss: 3.068384020511577
loss: 2.8135061440251823
loss: 2.591009374230955
loss: 2.396779834769502
loss: 2.2272262990741405
loss: 2.0792137900268055
loss: 1.9500056154839311
loss: 1.83721276793236
loss: 1.7387497526951115
loss: 1.6527960279682141
loss: 1.5777623437302735
loss: 1.5122613571446732
loss: 1.4550819811449316
loss: 1.4051669919188225
loss: 1.3615934812625692
loss: 1.3235557923769743
loss: 1.2903506235951538
loss: 1.2613640246157731
loss: 1.2360600448073211
loss: 1.2139708236951101
loss: 1.1946879404080628
loss: 1.177854862140024
loss: 1.1631603520006473
loss: 1.15033271436

**Display intermediate + final models**

In [22]:
print('learned model (intercept, slope): ', my_lr.get_model_params())
display_training_progression(data, my_lr.get_intermediate_y())

learned model (intercept, slope):  (0.041780301255991324, 0.9450948386598924)
