In [3]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

from sklearn.metrics import r2_score

In [None]:
class LinearRegression:
    def __init__(self, learning_rate, iteration):
        """
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.m = None
        self.n = None
        self.w = None
        self.lr = learning_rate
        self.it = iteration

    def cost_function(self, y, y_pred):
        """
        :param y: Original target value.
        :param y_pred: predicted target value.
        """
        return (1 / (2*self.m)) * np.sum(np.square(y_pred - y))
    
    def hypothesis(self, weights, X):
        """
        :param weights: parameter value weight.
        :param X: Training samples.
        """
        return np.dot(X, weights)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        # Insert constant ones for bias weights.
        X = np.insert(X, 0, 1, axis=1)
        # Target value should be in the shape of (n, 1) not (n, ).
        # So, this will check that and change the shape to (n, 1), if not.
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            return 
        
        # m is the number of training samples.
        self.m = X.shape[0]
        # n is the number of features.
        self.n = X.shape[1]

        # Set the initial weight.
        self.w = np.zeros((self.n , 1))

        for it in range(1, self.it+1):
            # 1. Find the predicted value through the hypothesis.
            # 2. Find the Cost function value.
            # 3. Find the derivation of weights.
            # 4. Apply Gradient Decent.
            y_pred = self.hypothesis(self.w, X) # shape - (m, 1)
            print("y_pred", y_pred)
            cost = self.cost_function(y, y_pred) # (1,1)
            print("cost", cost)
            # fin the derivative.
            dw = (1/self.m) * np.dot(X.T, (y_pred - y)) # # shape - (1, n)
            print("dw", dw)

            # change the weight parameter.
            self.w = self.w - self.lr * dw

            if it % 1000 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))
    def predict(self, test_X):
        """
        :param test_X: feature values to predict.
        """
        # Insert constant ones for bias weights
        test_X = np.insert(test_X, 0, 1, axis=1)
        y_pred = self.hypothesis(self.w, test_X)
        return y_pred
    


In [26]:
arr = np.random.randint(1,10, (2,5))
arr = np.insert(arr, obj=0, values=1, axis=1)
arr

array([[1, 7, 4, 5, 5, 2],
       [1, 6, 1, 1, 9, 1]])

In [27]:
# Define the traning data.
X, y = make_regression(n_samples=50000, n_features=8)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))

Number of training data samples-----> 50000
Number of training features --------> 8
Shape of the target value ----------> (50000, 1)


In [28]:
# display the data.
data = pd.DataFrame(X)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.872834,-0.091394,-0.449172,-2.2151,-0.183212,-0.470947,-0.553001,0.224764
1,-1.118741,2.639315,-1.841063,-1.153871,-2.127522,-0.402972,0.634471,-0.464345
2,0.021896,0.893358,-0.476948,-0.355454,-1.417674,-1.424461,0.494902,-1.502958
3,0.503519,1.297099,-0.959694,0.1866,0.040936,1.379175,0.452849,-0.363203
4,0.412848,1.366433,-1.369333,0.293715,0.727459,1.513542,2.105256,-0.63006


In [29]:
# display the data.
data_y = pd.DataFrame(y)
data_y.head()

Unnamed: 0,0
0,-33.873602
1,-131.985132
2,-285.771903
3,213.878566
4,275.364738


In [30]:
#define the parameters
param = {
    "learning_rate" : 0.1,
    "iteration" : 10000
}
print("="*100)
linear_reg = LinearRegression(**param)

# Train the model.
linear_reg.train(X[:2], y[:2]) 

# Predict the values.
y_pred = linear_reg.predict(X)

#Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)

y_pred [[0.]
 [0.]]
cost 4641.874014840553
dw [[  82.92936725]
 [ -59.04556264]
 [ 172.62726069]
 [-129.1040082 ]
 [-113.66360446]
 [-143.50368017]
 [ -34.56950914]
 [  32.50427332]
 [ -26.83653348]]
y_pred [[ -34.39466912]
 [-132.57677706]]
cost 0.1553885695367132
dw [[-0.55635584]
 [ 0.1035462 ]
 [-0.75695738]
 [ 0.66165207]
 [ 0.91844851]
 [ 0.67710165]
 [ 0.24190558]
 [-0.04361534]
 [ 0.07880508]]
y_pred [[ -34.10220983]
 [-131.92175148]]
cost 0.014069634123270326
dw [[-0.08261341]
 [-0.13522153]
 [ 0.09408759]
 [-0.00700189]
 [ 0.21662775]
 [-0.04648014]
 [ 0.04106063]
 [ 0.08331674]
 [-0.04040667]]
y_pred [[ -34.02701749]
 [-131.94514024]]
cost 0.006283897224988749
dw [[-0.05671162]
 [-0.08932334]
 [ 0.05978636]
 [-0.00235895]
 [ 0.14684226]
 [-0.02848816]
 [ 0.02806735]
 [ 0.05510627]
 [-0.02652616]]
y_pred [[ -33.97613892]
 [-131.95839009]]
cost 0.0028072263331800585
dw [[-0.03789726]
 [-0.05970749]
 [ 0.03997609]
 [-0.00158867]
 [ 0.09813599]
 [-0.01905427]
 [ 0.01875647]
 [ 0

# Lienar Regression using Skicit-Learn

In [7]:
from sklearn.linear_model import LinearRegression as LinearRegression_sklearn
from sklearn.metrics import r2_score

In [8]:
# data is already defined, going to use the same data for comparision.
print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))

Number of training data samples-----> 50000
Number of training features --------> 8


In [9]:
linear_reg_sklearn = LinearRegression_sklearn()
linear_reg_sklearn.fit(X, y)

# predict the value
y_pred_sklearn = linear_reg_sklearn.predict(X)
score = r2_score(y, y_pred_sklearn)
print("="*100)
print("R2 score of the model is {}".format(score))

R2 score of the model is 1.0


In [10]:
# Conclution:
# Our model works well as the scikit learn on speed and accuracy.