This notebook performs the flowing two major operations:

1.   Generate single variable (+ bias) sample data with linear relationship 
2.   Perform gradient descent Linear Regression training.

The main purpose of this notebook is to demonstrate the working principle of linear regression for the single (+ bias) case which can be easily extended to n valiable case (polinomial regression).



In [3]:
import numpy as np
import pandas as pd
import plotly.express as px

**Class to generate single variable (+ bias) linear data with random Gaussian noise**

In [4]:
class LinearDataWithRandomNoise:
  def __init__(self, intercept=0, slope=1, nb_of_sample=100):
    self.slope = slope
    self.intercept = intercept
    self.nb_of_sample = nb_of_sample
    self.data = self._generate_data()
  
  def _linear(self, x):
    return self.slope*x + self.intercept

  def _generate_data(self):
    errors = np.random.normal(scale=0.15, size=self.nb_of_sample)
    x = np.random.uniform(size=self.nb_of_sample)
    df = pd.DataFrame({'x': x, 'e': errors})
    df['y'] = df.apply(lambda x:self._linear(x.x) + x.e, axis=1)

    return df
  
  def get_data(self):
    return self.data.sort_values('x')

**Gradient descent Linear Regression Model**

In [5]:
class LinearRegressionGradientDescent:
    def __init__(self, intercept=0, slope=1, learning_rate=0.01, nb_iterations=100, log_results=True):
      self.beta_0 = intercept
      self.beta_1 = slope
      self.learning_rate = learning_rate
      self.nb_iterations = nb_iterations
      self.log_results = log_results
      self.intermediate_y = {}
      self.training_error = {}

    def _linear(self, x):
        return self.beta_0 + self.beta_1*x 

    def fit(self, X, y):
        # self.intermediate_y['start'] =  predict_linear(self.beta_0, self.beta_1, X)
        self.intermediate_y['start'] =  self._linear(X)
        for i in range(self.nb_iterations):
          gradient_beta_0 = np.sum(self.beta_0 + self.beta_1 * X - y)
          gradient_beta_1 = np.sum((self.beta_0 + self.beta_1 * X - y)*X)
          # we are minimizing, so graidents are made negative
          self.beta_0 -= self.learning_rate * gradient_beta_0
          self.beta_1 -= self.learning_rate * gradient_beta_1
          if self.log_results:
              # y_hat = predict_linear(self.beta_0, self.beta_1, X)
              y_hat = self._linear(X)
              self.training_error[i] = [0.5*np.sum((y_hat-y)**2)]
              self.intermediate_y[i] =  y_hat
              print('loss: {}'.format(self.training_error[i][0]))

    def get_model_params(self):
      return self.beta_0, self.beta_1
    def get_intermediate_y(self):
      return self.intermediate_y
    def get_training_loss(self):
      df = pd.DataFrame(self.training_error).T
      df = df.rename(columns={df.columns[0]: "squared_loss"})
      return df

In [6]:
def display_training_progression(train_data, intermediate_y, initial_iterations=2):
    nb_data_points = len(intermediate_y) - 1 
    mid_point_index = nb_data_points//2
    fig = px.scatter(x=data.x, y=data.y)
    fig.add_scatter(x=data.x, y=intermediate_y['start'],name="start")
    for i in range(initial_iterations):
      fig.add_scatter(x=data.x, y=intermediate_y[i],name="model iter_{}".format(i))
    if mid_point_index  > initial_iterations:
      fig.add_scatter(x=data.x, y=intermediate_y[mid_point_index],name="model (iter_{})".format(mid_point_index))
    if nb_data_points  > initial_iterations:
      fig.add_scatter(x=data.x, y=intermediate_y[nb_data_points-1],name="model (iter_{})".format(nb_data_points-1))
    fig.show()

**Generate some sample training data**

In [7]:
NB_OF_SAMPLE_DATA_POINTS = 100
PARAM_INTERCEPT = 0 
PARAM_SLOPE = 1
data = LinearDataWithRandomNoise(intercept=PARAM_INTERCEPT, slope=PARAM_SLOPE, nb_of_sample=NB_OF_SAMPLE_DATA_POINTS).get_data()
fig = px.scatter(x=data.x, y=data.y)
fig.show()

**Gradient descent training**

In [8]:
PARAM_INTERCEPT_TEST = 2 
PARAM_SLOPE_TEST = -1
my_lr = LinearRegressionGradientDescent(intercept=PARAM_INTERCEPT_TEST, slope=PARAM_SLOPE_TEST, nb_iterations=100)
my_lr.fit(data.x, data.y)
loss = my_lr.get_training_loss().squared_loss
fig = px.scatter(x=loss.index, y=loss).update_layout(xaxis_title="training iteration", yaxis_title="loss")
fig.show()

loss: 25.961613287478674
loss: 20.168942456342812
loss: 17.414308859370763
loss: 15.226388811182717
loss: 13.346625402971162
loss: 11.717872870839642
loss: 10.305382908403212
loss: 9.080331441330957
loss: 8.017835560187585
loss: 7.096324440919414
loss: 6.2970904496825035
loss: 5.6039083864313195
loss: 5.002706011958481
loss: 4.481278360739022
loss: 4.029039968138746
loss: 3.6368099756103645
loss: 3.2966257507499805
loss: 3.0015812384783014
loss: 2.7456867617213008
loss: 2.523747425413826
loss: 2.3312576553146576
loss: 2.1643097306732186
loss: 2.0195144538763055
loss: 1.8939323465951075
loss: 1.7850139756505465
loss: 1.6905481971566938
loss: 1.608617268250387
loss: 1.537557915133562
loss: 1.4759275670734804
loss: 1.4224750708797382
loss: 1.3761152913348191
loss: 1.3359070819433727
loss: 1.3010341787860147
loss: 1.270788629605176
loss: 1.2445564217180758
loss: 1.2218050169901367
loss: 1.202072540817247
loss: 1.184958405643189
loss: 1.1701151786609445
loss: 1.1572415286046802
loss: 1.1460

**Display intermediate + final models**

In [9]:
print('learned model (intercept, slope): ', my_lr.get_model_params())
display_training_progression(data, my_lr.get_intermediate_y())

learned model (intercept, slope):  (0.024013567881356786, 0.9797909740457063)
