# lets implement linear regression from scratch


## import section

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error

In [3]:
reg_dataset = datasets.make_regression(n_samples=1000, n_features=100, n_informative=10, n_targets=1, shuffle=True, random_state=42)
X = reg_dataset[0]
y = reg_dataset[1]

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = .80,shuffle= True, random_state = 42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(800, 100)
(200, 100)
(800,)
(200,)


In [5]:
standardscalar = StandardScaler()
standardscalar.fit(X_train)
standardscalar.transform(X_train)
standardscalar.transform(X_test)


array([[ 0.75070067, -0.3481925 , -1.17378933, ..., -1.90940973,
         0.46984207,  0.62394679],
       [ 0.84896602,  0.31452426, -0.21710923, ...,  1.1251983 ,
         1.96654319,  0.40548378],
       [ 0.66993726,  1.23443248,  0.99119684, ..., -0.464584  ,
         1.02210749,  1.26164339],
       ...,
       [-0.62975098,  0.57138191,  1.33530621, ..., -1.29090787,
        -1.27575008,  0.22965224],
       [-0.84372868, -1.52121785,  0.47398936, ..., -0.86898062,
         1.74069389, -0.06974126],
       [ 0.29565289,  0.92703245,  0.36029547, ..., -0.17267686,
         0.77579379, -1.19959866]])

# lets write code for linear regression

In [6]:
class LinearRegression:

	""" Linear Regression with multiple features """

	def __init__(self, learning_rate = 1e-3, max_iter = 1000):

		self.num_feats = int
		self.train_size = int
		self.weights = np.array 
		self.y_train = np.array 
		self.input_matrix = np.array

		self.learning_rate = learning_rate   #Learning rate for gradient descent
		self.max_iter = max_iter 	#Number of iterations to run gradient descent
		self.cost_threshold = 0.1 * learning_rate  #stopping criterion for gradien descent

	def fit(self, X, y):

		"""
			Adjust weights to training data
		"""

		self.train_size = X.shape[0]
		self.num_feats = X.shape[1]
		self.input_matrix = np.append(X, np.ones(self.train_size).reshape(-1, 1), axis = 1)   #Add Column with Ones for intercept term 
		self.y_train = y
		self.weights = np.zeros(self.num_feats + 1) #Extra +1 for the intercept


		#optimize weights
		prev_cost = float("inf")
		for i in range(self.max_iter):
			cost = self._update_weights()

			if i%100 ==0 or i == self.max_iter:
				print("Cost after {} iterations is: {}".format(i, cost))
			if abs(prev_cost -cost) < self.cost_threshold*prev_cost:
				print("Cost after {} iterations is: {}".format(i, cost))
				break
			prev_cost = cost

	def _update_weights(self):

		"""
			Cost Function:
				l(w) = (1/n) * (((y - wX)^2) 
			Gradient:
				delta_w = dl/dw = (2/n)*( ((y - wX)*(-X))
							
							 (or)
				delta_w = dl/dw = (2/n)*( ((wX - y)*(X)) 
			Gradient Descent:
				w = w - (learning_rate * delta_w)
		"""

		y_pred = (self.weights * self.input_matrix).sum(axis = 1)  # y_pred = wX

		cost = (1/self.train_size) * (((self.y_train - y_pred) ** 2).sum(axis = 0))  

		err = (y_pred - self.y_train).reshape(-1, 1)  # err = wX - y

		delta_w = (2/self.train_size) * ((err * self.input_matrix).sum(axis = 0)) #delta_w = (2/n)*( (wX - y)*(X)) 

		self.weights = self.weights - (self.learning_rate * delta_w) 

		return cost


	def predict(self, X):

		""" Make predictions on given X using trained model """

		size = X.shape[0]
		X = np.append(X, np.ones(size).reshape(-1, 1), axis = 1)

		y_pred = (self.weights * X).sum(axis = 1)

		return y_pred 

# lets fit and predict on the model


In [7]:
lin_reg = LinearRegression(learning_rate = 1e-3, max_iter = 5000)
lin_reg.fit(X_train, y_train)

Cost after 0 iterations is: 24752.939027679462
Cost after 100 iterations is: 16089.58998020282
Cost after 200 iterations is: 10649.843615559808
Cost after 300 iterations is: 7174.625456539455
Cost after 400 iterations is: 4915.864065326695
Cost after 500 iterations is: 3422.6335649062535
Cost after 600 iterations is: 2419.055491024841
Cost after 700 iterations is: 1733.7712692324474
Cost after 800 iterations is: 1258.7036832302888
Cost after 900 iterations is: 924.6368822719228
Cost after 1000 iterations is: 686.5657787657841
Cost after 1100 iterations is: 514.7883539620626
Cost after 1200 iterations is: 389.4157356216081
Cost after 1300 iterations is: 296.9421014814182
Cost after 1400 iterations is: 228.0720258439568
Cost after 1500 iterations is: 176.32545810187213
Cost after 1600 iterations is: 137.12990189314425
Cost after 1700 iterations is: 107.22181827319972
Cost after 1800 iterations is: 84.24683015180845
Cost after 1900 iterations is: 66.48936497415818
Cost after 2000 iteratio

In [8]:
print('Linear Regression Model Coefficients (W): {}'.format(lin_reg.weights[:-1]))
print('Linear Regression Model Intercept (b): {}'.format(lin_reg.weights[-1]))

Linear Regression Model Coefficients (W): [ 4.82293823e+01 -3.73555364e-02  2.02705048e-02 -1.35986598e-01
  8.32801336e-02  1.12663535e-02 -1.15921617e-02  1.71066426e-02
  9.14474060e-03 -2.02269526e-02 -5.83827922e-02 -2.12861391e-02
 -7.05958943e-02  1.94315579e-02  8.27687196e+01  2.12640114e-03
  5.59259880e-02  7.82890440e+01  3.55943304e-02  4.02046537e-02
  9.66546200e-02 -5.36181117e-03  3.53885758e-02 -2.18655896e-02
 -2.76309478e-02 -7.30671940e-02  1.87158956e+00  2.79897754e-02
  1.14003261e-02  3.54715059e-02 -3.56067973e-03  5.50174271e-02
 -7.33437331e-02 -1.27835124e-02 -3.34085679e-02 -6.03063045e-02
  2.12262517e-02 -7.65986660e-02 -3.95743055e-04  1.41059099e-02
 -2.45605421e-02 -1.90800305e-02  5.69584117e+01  2.80131646e-02
 -4.79975328e-03  1.10451373e-02  2.82987803e-02 -1.53223315e-02
  8.33374862e-02  2.94664079e-02  1.60562138e-02  9.11381723e-03
  2.81750116e-02 -5.21291707e-02 -3.48727420e-02  6.55683730e-02
  3.20355072e-02  2.94640966e+01 -1.48969099e-02

# lets see the MAPE of the model

In [9]:
#Evaluating Model through MAPE
print("\nMean Absolute Percentage Error(for train data): {}".format(mean_absolute_percentage_error(y_train, lin_reg.predict(X_train))))
print("Mean Absolute Percentage Error(for test data): {}".format(mean_absolute_percentage_error(y_test, lin_reg.predict(X_test))))


Mean Absolute Percentage Error(for train data): 0.007574896963898376
Mean Absolute Percentage Error(for test data): 0.010813259071298898


### yes we got the best model because the data is simulated on real dataset we have lot of optimisation to do.