In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

from sklearn.metrics import r2_score

# Common Regression Class

In [2]:
class Regression:
    def __init__(self, learning_rate, iteration, regularization):
        """
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.m = None
        self.n = None
        self.w = None
        self.b = None
        self.regularization = regularization # will be the l1/l2 regularization class according to the regression model.
        self.lr = learning_rate
        self.it = iteration

    def cost_function(self, y, y_pred):
        """
        :param y: Original target value.
        :param y_pred: predicted target value.
        """
        return (1 / (2*self.m)) * np.sum(np.square(y_pred - y)) + self.regularization(self.w)
    
    def hypothesis(self, weights, bias, X):
        """
        :param weights: parameter value weight.
        :param X: Training samples.
        """
        return np.dot(X, weights) #+ bias

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        # Insert constant ones for bias weights.
        X = np.insert(X, 0, 1, axis=1)

        # Target value should be in the shape of (n, 1) not (n, ).
        # So, this will check that and change the shape to (n, 1), if not.
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            return 
        
        # m is the number of training samples.
        self.m = X.shape[0]
        # n is the number of features.
        self.n = X.shape[1]

        # Set the initial weight.
        self.w = np.zeros((self.n , 1))

        # bias.
        self.b = 0

        for it in range(1, self.it+1):
            # 1. Find the predicted value through the hypothesis.
            # 2. Find the Cost function value.
            # 3. Find the derivation of weights.
            # 4. Apply Gradient Decent.
            y_pred = self.hypothesis(self.w, self.b, X)
            #print("iteration",it)
            #print("y predict value",y_pred)
            cost = self.cost_function(y, y_pred)
            #print("Cost function",cost)
            # fin the derivative.
            dw = (1/self.m) * np.dot(X.T, (y_pred - y)) + self.regularization.derivation(self.w)
            #print("weights derivation",dw)
            #db = -(2 / self.m) * np.sum((y_pred - y))

            # change the weight parameter.
            self.w = self.w - self.lr * dw
            #print("updated weights",self.w)
            #self.b = self.b - self.lr * db


            if it % 10 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))
    def predict(self, test_X):
        """
        :param test_X: feature values to predict.
        """
        # Insert constant ones for bias weights.
        test_X = np.insert(test_X, 0, 1, axis=1)

        y_pred = self.hypothesis(self.w, self.b, test_X)
        return y_pred

# Regularization class for elastic net

In [7]:
class l1_l2_regularization:
    def __init__(self, lamda = 0.1, l_ratio = 0.5):
        self.lamda = lamda 
        self.l_ratio = l_ratio

    def __call__(self, weights):
        l1_contribution = self.l_ratio * self.lamda * np.sum(np.abs(weights))
        l2_contribution = (1 - self.l_ratio) * self.lamda * 0.5 * np.sum(np.square(weights))
        return (l1_contribution + l2_contribution)

    def derivation(self, weights):
        l1_derivation = self.lamda * self.l_ratio * np.sign(weights)
        l2_derivation = self.lamda * (1 - self.l_ratio) * weights
        return (l1_derivation + l2_derivation)

# Data Creation

In [4]:
# Define the traning data.
X, y = make_regression(n_samples=50000, n_features=8)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))

Number of training data samples-----> 50000
Number of training features --------> 8
Shape of the target value ----------> (50000, 1)


In [5]:
# display the data.
data = pd.DataFrame(X)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.867789,-0.101748,-1.880689,0.29781,2.738893,-0.505242,0.434876,-0.55995
1,-0.770503,-0.931669,1.308884,0.389382,1.49571,-0.105214,-0.000994,-0.002724
2,-1.400998,-0.615129,-0.454939,-0.463922,0.726401,-0.275039,-0.093223,2.472022
3,0.225891,-0.845367,0.57486,1.833177,-1.292813,0.846228,-1.296938,-1.71716
4,-0.558011,0.452136,-1.667938,-0.677074,0.493884,-0.087129,0.007175,-0.688758


In [6]:
# display the data.
data_y = pd.DataFrame(y)
data_y.head()

Unnamed: 0,0
0,131.922335
1,60.788398
2,83.642388
3,-212.055816
4,-104.694715


# Elastic net from Scratch

Elastica Net is the combination of Lasso and Ridge regression. It has the advanage of both Lasso and Ridge Regression.
- **Lasso** --> Will completely eliminates the correlated/unwanted features from model. So, the model has less complexity and becomes easy to learn.
- **Ridge** --> Will shrink the parameter close to zero , but never eliminate the variables.

Usage:
- We can use the **Lasso** model --> when we have a dataset with more correlated/unwanted features.
- We can use the **Ridge** model --> When we have a dataset with more useful features.

### We can use the Elastic Net when we don't know the correlation between the features and having lots of (1000s<) features.

In [17]:
class ElasticNetRegression(Regression):
    """
    Elastic Regression class
    """
    def __init__(self, lamda, l_ratio, learning_rate, iteration):
        """
        Define the hyperparameters we are going to use in this model.
        :param lamda: Regularization factor.
        :param l_ratio: The ratio between lasso and ridge regression--> default is 0.5.
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.regularization = l1_l2_regularization(lamda,l_ratio)
        super(ElasticNetRegression, self).__init__(learning_rate, iteration, self.regularization)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        return super(ElasticNetRegression, self).train(X, y)
    def predict(self, test_X):
        """
        parma test_X: Value need to be predicted.
        """
        return super(ElasticNetRegression, self).predict(test_X)

In [16]:
#define the parameters
param = {
    "l_ratio" : 0.5,
    "lamda" : 0.1,
    "learning_rate" : 0.1,
    "iteration" : 100
}
print("="*100)
elastic_net_reg = ElasticNetRegression(**param)

# Train the model.
elastic_net_reg.train(X, y) 

# Predict the values.
y_pred = elastic_net_reg.predict(X)

#Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)

The Cost function for the iteration 10----->1976.4900563834374 :)
The Cost function for the iteration 20----->701.1758775240576 :)
The Cost function for the iteration 30----->563.2582248687778 :)
The Cost function for the iteration 40----->548.3351232529961 :)
The Cost function for the iteration 50----->546.7197090314678 :)
The Cost function for the iteration 60----->546.5445700049715 :)
The Cost function for the iteration 70----->546.5256016500784 :)
The Cost function for the iteration 80----->546.5236497045239 :)
The Cost function for the iteration 90----->546.523290811168 :)
The Cost function for the iteration 100----->546.5233321859182 :)
The r2_score of the trained model 0.9976669791861328


# Elastic Net using scikit-learn for comparition

In [14]:
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score

# data is already defined, going to use the same data for comparision.
print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))


Number of training data samples-----> 50000
Number of training features --------> 8


In [15]:
elastic_net_sklearn = ElasticNet()
elastic_net_sklearn.fit(X, y)

# predict the value
y_pred_sklearn = elastic_net_sklearn.predict(X)
score = r2_score(y, y_pred_sklearn)
print("="*100)
print("R2 score of the model is {}".format(score))

R2 score of the model is 0.8855865841875906


# Conclution

Our model perofrming well with the default parameters. We can tune the parameter littele bit to get some more good results without the tension of pverfitting :)

# Supervised Machine Learning models scratch series....
you can also check....

- 1) Linear Regression     ---> https://www.kaggle.com/ninjaac/linear-regression-from-scratch
- 2) Lasso Regression      ---> https://www.kaggle.com/ninjaac/lasso-ridge-regression 
- 3) Ridge Regression      ---> https://www.kaggle.com/ninjaac/lasso-ridge-regression 
- 4) ElasticNet Regression ---> https://www.kaggle.com/ninjaac/elasticnet-regression (Same Notebook you are looking now)