# Linear Regression using Gradient Descent from scratch
##### By: Ravikumar Patel

In [1]:
import numpy as np
import pandas as pd

class Linear_Regression:
    
    def __init__(self, learning_rate_b=0.1, learning_rate_w=0.1, max_iter=10000):
      self.learning_rate_b=learning_rate_b
      self.learning_rate_w=learning_rate_w
      self.max_iter=max_iter
      self.w = [0]
    
    def get_weights(self):
      return self.w
    
    def train (self, data, target, initialization = None):

      if type(data) != np.ndarray:
        data = np.array(data)

      if np.isnan(data).any() == True:
        print("Error: The data contains null/NaN value/s.")
        return

      # combining initial values
      features = data.shape[1]+1
      
      if initialization != None and len(initialization) != features:
        print("Error: initialization has wrong number of values!")
        return

      if initialization == None:
        self.w = np.array([[0]]*features)
      else:
        self.w = np.array([[x] for x in initialization])

      n = target.size
    
      target = target.reshape((n,1))
      
      # creating a column of ones
      one_vector = np.ones((n,1),dtype=int)

      # adding a column of ones
      data = np.hstack((one_vector, data))

      # combining learning rates
      lst_lr = [[self.learning_rate_b]]
      lst_lr.extend([[self.learning_rate_w]] * (features-1))
      learning_rates = np.array(lst_lr)
      
      for i in range(self.max_iter+1):

        # prediting the target value
        y = np.dot(data, self.w)

        # finding differnece in the predicted value and target value
        residuals = y - target
        
        # finding the cost
        cost = np.mean((residuals ** 2))

        # updating the weights for features and bias
        self.w = self.w - ((2*learning_rates / n) * np.dot(data.T, residuals))
    
    def predict(self, test):
      n = len(test)

      # creating a column of ones
      one_vector = np.ones((n,1),dtype=int)

      # adding a column of ones
      test = np.hstack((one_vector, test))
    
      return np.dot(test, self.w)

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import pandas as pd

# reading data from the file
X, y, real_coef = make_regression(n_samples=500, n_features=2, noise=0.3, random_state=10, coef=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

lr = Linear_Regression()

lr.train(X_train, y_train)

y_pred_lr = lr.predict(X_test)

y_pred_coef = lr.get_weights()[1:]

print( "Model's RSE: {:.8f}".format(np.sqrt(mean_squared_error(y_test, y_pred_lr))))

print("Absolute Difference between real coef and predicted: {:.3f}".format(mean_absolute_error(real_coef,y_pred_coef)))

# References

https://en.wikipedia.org/wiki/Linear_regression

https://machinelearningmastery.com/linear-regression-for-machine-learning/