In [5]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

In [24]:
class LinearRegression:
    def __init__(self, learning_rate, iteration):
        """
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.m = None
        self.n = None
        self.w = None
        self.lr = learning_rate
        self.it = iteration

    def cost_function(self, y, y_pred):
        """
        :param y: Original target value.
        :param y_pred: predicted target value.
        """
        return (1 / (2*self.m) * ((y_pred - y) * (y_pred - y)))
    
    def hypothesis(self, weights, X):
        """
        :param weights: parameter value weight.
        :param X: Training samples.
        """
        return np.dot(X, weights)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        # Insert constant ones for bias weights.
        X = np.insert(X, 0, 1, axis=1)
        # Target value should be in the shape of (n, 1) not (n, ).
        # So, this will check that and change the shape to (n, 1), if not.
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            return 
        
        # m is the number of training samples.
        self.m = X.shape[0]
        # n is the number of features.
        self.n = X.shape[1]

        # Set the initial weight.
        self.w = np.zeros((self.n , 1))

        for it in range(1, self.it+1):
            # 1. Find the predicted value through the hypothesis.
            # 2. Find the Cost function value.
            # 3. Find the derivation of weights.
            # 4. Apply Gradient Decent.
            y_pred = self.hypothesis(self.w, X)

            cost = self.cost_function(y, y_pred)

            dw = (y_pred - y).dot(X)

            # change the weight parameter.
            self.w -= self.lr * dw

            if it % 1000 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))
    def predict(self, test_X):
        """
        """
        # Insert constant ones for bias weights
        test_X = np.insert(test_X, 0, 1, axis=1)
        y_pred = self.hypothesis(self.w, test_X)
        return y_pred
    


In [16]:
# Define the traning data.
X, y = make_regression(n_samples=5000, n_features=4)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))

Number of training data samples-----> 5000
Number of training features --------> 4
Shape of the target value ----------> (5000, 1)


In [17]:
# display the data.
data = pd.DataFrame(X)
data.head()

Unnamed: 0,0,1,2,3
0,-0.627118,-0.171318,-2.35706,0.090441
1,0.716848,-0.221724,1.504886,-0.5771
2,1.141414,-0.135924,0.603236,-0.736772
3,-1.372347,1.072938,-0.174622,-1.952037
4,0.939622,1.308123,1.144487,-0.342836


In [18]:
# display the data.
data_y = pd.DataFrame(y)
data_y.head()

Unnamed: 0,0
0,-6.962879
1,-2.190701
2,-0.653876
3,-32.987431
4,12.809097


In [25]:
#define the parameters
param = {
    "learning_rate" : 0.1,
    "iteration" : 10000
}
print("="*100)
linear_reg = LinearRegression(**param)

# Train the model.
linear_reg.train(X, y) 

# Predict the values.
y_pred = linear_reg.predict(X)

#Root mean square error.




ValueError: shapes (5000,1) and (5000,5) not aligned: 1 (dim 1) != 5000 (dim 0)