# lets implement ElasticNet regression from scratch


## import section

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
reg_dataset = datasets.make_regression(n_samples=1000, n_features=100, n_informative=10, n_targets=1, shuffle=True, random_state=42)
X = reg_dataset[0]
y = reg_dataset[1]

In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = .80,shuffle= True, random_state = 42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(800, 100)
(200, 100)
(800,)
(200,)


In [4]:
standardscalar = StandardScaler()
standardscalar.fit(X_train)
standardscalar.transform(X_train)
standardscalar.transform(X_test)


array([[ 0.75070067, -0.3481925 , -1.17378933, ..., -1.90940973,
         0.46984207,  0.62394679],
       [ 0.84896602,  0.31452426, -0.21710923, ...,  1.1251983 ,
         1.96654319,  0.40548378],
       [ 0.66993726,  1.23443248,  0.99119684, ..., -0.464584  ,
         1.02210749,  1.26164339],
       ...,
       [-0.62975098,  0.57138191,  1.33530621, ..., -1.29090787,
        -1.27575008,  0.22965224],
       [-0.84372868, -1.52121785,  0.47398936, ..., -0.86898062,
         1.74069389, -0.06974126],
       [ 0.29565289,  0.92703245,  0.36029547, ..., -0.17267686,
         0.77579379, -1.19959866]])

# lets write code for ElasticNet Regression regression

In [6]:
class ElasticNet:

	def __init__(self, learning_rate = 1e-3, alpha_ridge = 1.0,alpha_lasso=1.0, max_iter = 1000):

		self.num_feats = int
		self.train_size = int
		self.weights = np.array 
		self.y_train = np.array 
		self.input_matrix = np.array

		self.learning_rate = learning_rate   #Learning rate for gradient descent
		self.alpha_ridge = alpha_ridge 	 #Regularization parameter, to control bias-variance tradeoff for l2 regulariser
		self.alpha_lasso = alpha_lasso 	 #Regularization parameter, to control bias-variance tradeoff for l1 regulariser
		self.max_iter = max_iter 	#Number of iterations to run gradient descent
		self.cost_threshold = 0.1 * learning_rate  #stopping criterion for gradien descent

	def fit(self, X, y):

		"""
			Adjust weights to training data
		"""
		X = pd.DataFrame(X)
		self.train_size = X.shape[0]
		self.num_feats = X.shape[1]
		self.input_matrix = np.append(X, np.ones(self.train_size).reshape(-1, 1), axis = 1)   #Add Column with Ones for intercept term 
		self.y_train = y
		self.weights = np.zeros(self.num_feats + 1) #Extra +1 for the intercept


		#optimize weights
		prev_cost = float("inf")
		for i in range(self.max_iter):
			cost = self._update_weights()

			if i%100 ==0 or i == self.max_iter:
				print("Cost after {} iterations is: {}".format(i, cost))
			if abs(prev_cost -cost) < self.cost_threshold*prev_cost:
				print("Cost after {} iterations is: {}".format(i, cost))
				break
			prev_cost = cost

	def _update_weights(self):

		"""
			Cost Function:
				l(w) = (1/n) * (((y - wX)^2) + alpha_ridge * (w^2) + alpha_lasso * |w|)
			Gradient:
				delta_w = dl/dw = (2/n)*( ((y - wX)*(-X)) + alpha_ridge * w + (alpha_lasso/2) * (sign(w)))
							
							 (or)
				delta_w = dl/dw = (2/n)*( ((wX - y)*(X)) + alpha_ridge * w + (alpha_lasso/2) * (sign(w)))
			Gradient Descent:
				w = w - (learning_rate * delta_w)
		"""

		y_pred = (self.weights * self.input_matrix).sum(axis = 1)  # y_pred = wX

		cost = (1/self.train_size) * (((self.y_train - y_pred) ** 2).sum(axis = 0) + (self.alpha_ridge * (self.weights ** 2)).sum(axis = 0)  + (self.alpha_lasso * np.abs(self.weights)).sum(axis = 0))

		err = (y_pred - self.y_train).reshape(-1, 1)  # err = wX - y

		delta_w = (2/self.train_size) * (((err * self.input_matrix).sum(axis = 0)) + (self.alpha_ridge * self.weights) + ((self.alpha_lasso/2) * np.sign(self.weights)))  #delta_w = (2/n)*( ((wX - y)*(X)) + alpha_ridge * w + (alpha_lasso/2) * (sign(w))))

		self.weights = self.weights - (self.learning_rate * delta_w) 

		return cost


	def predict(self, X):

		""" Make predictions on given X using trained model """

		size = X.shape[0]
		X = np.append(X, np.ones(size).reshape(-1, 1), axis = 1)

		y_pred = (self.weights * X).sum(axis = 1)

		return y_pred 
		


## Lets fit the model and predict 

In [9]:
Elasticnet = ElasticNet(learning_rate = 1e-3, alpha_ridge = 20.0,alpha_lasso = 20.0,max_iter = 3000)
Elasticnet.fit(X_train, y_train)


Cost after 0 iterations is: 24752.939027679462
Cost after 100 iterations is: 16135.309690010261
Cost after 200 iterations is: 10777.663588795513
Cost after 300 iterations is: 7388.626369299648
Cost after 400 iterations is: 5207.570592743641
Cost after 500 iterations is: 3779.895351264939
Cost after 600 iterations is: 2829.811587995618
Cost after 700 iterations is: 2187.436910880633
Cost after 800 iterations is: 1746.4990276656276
Cost after 900 iterations is: 1439.4846770480196
Cost after 1000 iterations is: 1222.8524585007826
Cost after 1100 iterations is: 1068.079245696955
Cost after 1200 iterations is: 956.222664673718
Cost after 1300 iterations is: 874.524159534554
Cost after 1400 iterations is: 814.2728643101177
Cost after 1500 iterations is: 769.4457822808116
Cost after 1600 iterations is: 735.8240527059278
Cost after 1700 iterations is: 710.42502024405
Cost after 1800 iterations is: 691.1027743793537
Cost after 1900 iterations is: 676.3137210991154
Cost after 2000 iterations is:

In [10]:
print('Elasticnet Regression Model Coefficients (W): {}'.format(Elasticnet.weights[:-1]))
print('Elasticnet Regression Model Intercept (b): {}'.format(Elasticnet.weights[-1]))

Elasticnet Regression Model Coefficients (W): [ 4.65155976e+01  5.75050661e-02  3.41833143e-01 -1.54308690e+00
  1.03093960e+00 -2.58203998e-02 -1.32440842e-01 -9.39234217e-02
  4.41306129e-01 -7.94262260e-01 -8.95735471e-01 -3.32236278e-01
 -1.08324155e+00  9.26830083e-03  7.88910564e+01  4.88100917e-01
  1.01296878e+00  7.29243581e+01  4.82850510e-01  4.10187025e-01
  6.46123934e-01  2.49645690e-01  3.93472493e-01 -4.66033101e-01
 -4.33042578e-01 -8.30478543e-01  1.85417945e+00  5.74803436e-01
  7.70610925e-01  7.21875981e-01 -3.70957312e-01  5.96262187e-01
 -9.24495880e-01 -3.35963504e-01 -2.66326605e-01 -9.43175708e-01
 -4.60329705e-01 -8.31914591e-01  1.26846335e-01  1.05525056e-01
 -1.53481460e-04 -2.87263957e-01  5.43897641e+01  2.54519809e-01
 -2.30825325e-01  5.86464707e-01 -2.22323286e-01 -9.33964447e-03
  1.15612689e+00  7.33518571e-02  3.09894180e-01 -2.06417581e-01
  5.12719167e-01 -1.06492128e+00 -9.00675865e-01  7.86153732e-01
  5.28975332e-01  2.74479001e+01  4.62668842

# lets see the MAPE of the model

In [11]:
#Evaluating Model through MAPE
print("\nMean Absolute Percentage Error(for train data): {}".format(mean_absolute_percentage_error(y_train, Elasticnet.predict(X_train))))
print("Mean Absolute Percentage Error(for test data): {}".format(mean_absolute_percentage_error(y_test, Elasticnet.predict(X_test))))


Mean Absolute Percentage Error(for train data): 0.12746869977914257
Mean Absolute Percentage Error(for test data): 0.17788286349341875
