# lets implement Lasso regression from scratch


## import section

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
reg_dataset = datasets.make_regression(n_samples=1000, n_features=100, n_informative=10, n_targets=1, shuffle=True, random_state=42)
X = reg_dataset[0]
y = reg_dataset[1]

In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = .80,shuffle= True, random_state = 42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(800, 100)
(200, 100)
(800,)
(200,)


In [4]:
standardscalar = StandardScaler()
standardscalar.fit(X_train)
standardscalar.transform(X_train)
standardscalar.transform(X_test)


array([[ 0.75070067, -0.3481925 , -1.17378933, ..., -1.90940973,
         0.46984207,  0.62394679],
       [ 0.84896602,  0.31452426, -0.21710923, ...,  1.1251983 ,
         1.96654319,  0.40548378],
       [ 0.66993726,  1.23443248,  0.99119684, ..., -0.464584  ,
         1.02210749,  1.26164339],
       ...,
       [-0.62975098,  0.57138191,  1.33530621, ..., -1.29090787,
        -1.27575008,  0.22965224],
       [-0.84372868, -1.52121785,  0.47398936, ..., -0.86898062,
         1.74069389, -0.06974126],
       [ 0.29565289,  0.92703245,  0.36029547, ..., -0.17267686,
         0.77579379, -1.19959866]])

# Lasso Regression from scratch

In [8]:
class Lasso:

	def __init__(self, learning_rate = 1e-3, alpha = 1.0, max_iter = 1000):

		self.num_feats = int
		self.train_size = int
		self.weights = np.array 
		self.y_train = np.array 
		self.input_matrix = np.array

		self.learning_rate = learning_rate   #Learning rate for gradient descent
		self.alpha = alpha 	 #Regularization parameter, to control bias-variance tradeoff
		self.max_iter = max_iter 	#Number of iterations to run gradient descent
		self.cost_threshold = 0.1 * learning_rate  #stopping criterion for gradient descent

	def fit(self, X, y):

		"""
			Adjust weights to training data
		"""
		X=pd.DataFrame(X)
		self.train_size = X.shape[0]
		self.num_feats = X.shape[1]
		self.input_matrix = np.append(X, np.ones(self.train_size).reshape(-1, 1), axis = 1)   #Add Column with Ones for intercept term 
		self.y_train = y
		self.weights = np.zeros(self.num_feats + 1) #Extra +1 for the intercept


		#optimize weights
		prev_cost = float("inf")
		for i in range(self.max_iter):
			cost = self._update_weights()

			if i%100 ==0 or i == self.max_iter:
				print("Cost after {} iterations is: {}".format(i, cost))
			if abs(prev_cost -cost) < self.cost_threshold*prev_cost:
				print("Cost after {} iterations is: {}".format(i, cost))
				break
			prev_cost = cost

	def _update_weights(self):

		"""
			Cost Function:
				l(w) = (1/n) * (((y - wX)^2) + alpha * |W|))
			Gradient:
				delta_w = dl/dw = (1/n)*( 2*((y - wX)*(-X)) + alpha * (sign(w))) 
							
							 (or)
				delta_w = dl/dw = (1/n)*( 2*((wX - y)*(X)) + alpha * (sign(w)))
			Gradient Descent:
				w = w - (learning_rate * delta_w)
		"""

		y_pred = (self.weights * self.input_matrix).sum(axis = 1)  # y_pred = wX

		cost = (1/self.train_size) * (((self.y_train - y_pred) ** 2).sum(axis = 0) + (self.alpha * np.abs(self.weights)).sum(axis = 0)) 

		err = (y_pred - self.y_train).reshape(-1, 1)  # err = wX - y

		delta_w = (1/self.train_size) * (2 * ((err * self.input_matrix).sum(axis = 0)) + (self.alpha * np.sign(self.weights))) #delta_w = (1/n)*(2*((wX - y)*(X)) + alpha * sign(w))

		self.weights = self.weights - (self.learning_rate * delta_w) 

		return cost

	def predict(self, X):

		""" Make predictions on given X using trained model """

		size = X.shape[0]
		X = np.append(X, np.ones(size).reshape(-1, 1), axis = 1)

		y_pred = (self.weights * X).sum(axis = 1)

		return y_pred 
		

# lets fit and predict on the model


In [22]:
lasso_reg = Lasso(learning_rate = 1e-3, alpha = 40.0,max_iter = 2500)
lasso_reg.fit(X_train, y_train)

Cost after 0 iterations is: 24752.939027679462
Cost after 100 iterations is: 16101.750905126275
Cost after 200 iterations is: 10668.328639335172
Cost after 300 iterations is: 7196.236558002814
Cost after 400 iterations is: 4938.885802906496
Cost after 500 iterations is: 3446.156694609389
Cost after 600 iterations is: 2442.6218081251404
Cost after 700 iterations is: 1757.1687823774691
Cost after 800 iterations is: 1281.8595190902665
Cost after 900 iterations is: 947.5374658716821
Cost after 1000 iterations is: 709.2337246867888
Cost after 1100 iterations is: 537.2345621559493
Cost after 1200 iterations is: 411.65212938906575
Cost after 1300 iterations is: 318.98682727478234
Cost after 1400 iterations is: 249.9508236453371
Cost after 1500 iterations is: 198.06233461121383
Cost after 1600 iterations is: 158.7586488442248
Cost after 1700 iterations is: 128.75842730794838
Cost after 1800 iterations is: 105.70630790250594
Cost after 1900 iterations is: 87.87923601645005
Cost after 2000 itera

In [23]:
print('lasso Regression Model Coefficients (W): {}'.format(lasso_reg.weights[:-1]))
print('lasso Regression Model Intercept (b): {}'.format(lasso_reg.weights[-1]))

lasso Regression Model Coefficients (W): [ 4.77875140e+01 -5.77987665e-02  1.94015076e-01 -1.06873602e+00
  6.92965787e-01  3.72610675e-05 -6.23920851e-02  5.35606252e-03
  2.17785175e-01 -4.23569431e-01 -5.90784686e-01 -1.98120393e-01
 -6.95145527e-01  4.27326174e-02  8.15292189e+01  2.18042029e-01
  6.17920313e-01  7.60099687e+01  3.15401956e-01  3.52101251e-01
  5.65225268e-01  1.21285886e-01  2.89956777e-01 -2.43195152e-01
 -2.70882801e-01 -5.32560623e-01  1.89134558e+00  3.29921602e-01
  4.24822450e-01  4.33376634e-01 -1.65640735e-01  4.29236836e-01
 -6.30715945e-01 -1.67472869e-01 -1.89598480e-01 -5.91277672e-01
 -1.65577756e-01 -5.83966219e-01  5.63181574e-02  8.06094859e-02
 -3.43428707e-02 -1.49413937e-01  5.61775749e+01  1.95974163e-01
 -1.01453351e-01  3.19822712e-01 -6.33548933e-02 -3.51967296e-02
  7.99147318e-01  8.45923016e-02  1.51299757e-01 -5.30358990e-02
  3.31609778e-01 -6.20342824e-01 -5.43934888e-01  5.35077747e-01
  3.18852382e-01  2.86479146e+01  1.81707135e-01 

# lets see the MAPE of the model

In [24]:
#Evaluating Model through MAPE
print("\nMean Absolute Percentage Error(for train data): {}".format(mean_absolute_percentage_error(y_train, lasso_reg.predict(X_train))))
print("Mean Absolute Percentage Error(for test data): {}".format(mean_absolute_percentage_error(y_test, lasso_reg.predict(X_test))))


Mean Absolute Percentage Error(for train data): 0.07380197276792119
Mean Absolute Percentage Error(for test data): 0.10455231401496565
