In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_regression

from sklearn.metrics import r2_score

In [None]:
class LinearRegression:
    def __init__(self, learning_rate, iteration):
        """
        :param learning_rate: A samll value needed for gradient decent, default value id 0.1.
        :param iteration: Number of training iteration, default value is 10,000.
        """
        self.m = None
        self.n = None
        self.w = None
        self.lr = learning_rate
        self.it = iteration

    def cost_function(self, y, y_pred):
        """
        :param y: Original target value.
        :param y_pred: predicted target value.
        """
        return (1 / (2*self.m)) * np.sum(np.square(y_pred - y))
    
    def hypothesis(self, weights, X):
        """
        :param weights: parameter value weight.
        :param X: Training samples.
        """
        return np.dot(X, weights)

    def train(self, X, y):
        """
        :param X: training data feature values ---> N Dimentional vector.
        :param y: training data target value -----> 1 Dimentional array.
        """
        # Insert constant ones for bias weights.
        X = np.insert(X, 0, 1, axis=1)
        # Target value should be in the shape of (n, 1) not (n, ).
        # So, this will check that and change the shape to (n, 1), if not.
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            return 
        
        # m is the number of training samples.
        self.m = X.shape[0]
        # n is the number of features.
        self.n = X.shape[1]

        # Set the initial weight.
        self.w = np.zeros((self.n , 1))

        for it in range(1, self.it+1):
            # 1. Find the predicted value through the hypothesis.
            # 2. Find the Cost function value.
            # 3. Find the derivation of weights.
            # 4. Apply Gradient Decent.
            y_pred = self.hypothesis(self.w, X) # shape - (m, 1)
            print("y_pred", y_pred)
            cost = self.cost_function(y, y_pred) # (1,1)
            print("cost", cost)
            # fin the derivative.
            dw = (1/self.m) * np.dot(X.T, (y_pred - y)) # # shape - (1, n)
            print("dw", dw)

            # change the weight parameter.
            self.w = self.w - self.lr * dw

            if it % 1000 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))
    def predict(self, test_X):
        """
        :param test_X: feature values to predict.
        """
        # Insert constant ones for bias weights
        test_X = np.insert(test_X, 0, 1, axis=1)
        y_pred = self.hypothesis(self.w, test_X)
        return y_pred
    


In [None]:
class LinearRegression:
    def __init__(self, learning_rate=0.1, iteration=1000):
        self.X = None
        self.y = None
        self.m = None 
        self.n = None 
        self.weights = None 
        self.max_iteration = iteration
        self.learning_rate = learning_rate
    
    def cost_func(self, y_true, y_pred):
        """
        Cost function is the mean squared error.

        """
        return (1/ (2 * self.m)) * np.sum(np.square(y_pred - y_true))


    def hypothesis_fun(self, X):
        return np.dot(X, self.weights) # X = (m, n) w = (n,1)

    def train(self, X: np.ndarray , y: np.ndarray):
        # basic checks does the X and y have the same number of rows.
        if X.shape[0] != y.shape[0]:
            return "X and Y should havethe same number of samples."
        
        # y should have the shape of (m, 1).
        try:
            y.shape[1]
        except IndexError as e:
            # we need to change it to the 1 D array, not a list.
            print("ERROR: Target array should be a one dimentional array not a list"
                  "----> here the target value not in the shape of (n,1). \nShape ({shape_y_0},1) and {shape_y} not match"
                  .format(shape_y_0 = y.shape[0] , shape_y = y.shape))
            y = y.reshape((y.shape[0], 1))
        
        # add the bias to the X itself.
        X = np.insert(X, 0, 1, axis=1)
        
        self.m, self.n = X.shape 
        self.X = X
        self.y = y

        

        # initialize the weights with the random values.
        # self.weights = np.random.randint(0,1, size=(self.n, 1)) # this always returns 0.
        self.weights = np.random.randn(self.n, 1) * 0.01 

        for it in range(self.max_iteration):
            # Find the y_pred using the hypothesis function.
            y_pred = self.hypothesis_fun(self.X)

            # Find the erorr value for this y_pred using the cost function.
            cost = self.cost_func(y, y_pred)

            # do gradiant decent.
            dw = (1/self.m) * np.dot(self.X.T, (y_pred - y)) # X=(m,n) y = (m,1) so Transport and do it
            self.weights = self.weights - self.learning_rate * dw

            if it % 100 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(it, cost))



    def predict(self, X_test):
        "return the predict value for each samples"
        X_test = np.insert(X_test, 0, 1, axis=1)
        y_pred = self.hypothesis_fun(X_test)
        return y_pred



In [44]:
arr = np.random.randint(1,10, (2,5))
arr = np.insert(arr, obj=0, values=1, axis=1)
arr

array([[1, 8, 1, 2, 7, 2],
       [1, 8, 9, 1, 9, 6]])

In [45]:
# Define the traning data.
X, y = make_regression(n_samples=50000, n_features=8)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))

Number of training data samples-----> 50000
Number of training features --------> 8
Shape of the target value ----------> (50000, 1)


In [46]:
# display the data.
data = pd.DataFrame(X)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.545664,0.794003,-1.942561,-0.183487,-1.38261,0.083553,-0.209177,-0.070043
1,-0.103853,0.70074,-0.142098,1.099007,0.272149,-0.820587,-0.387234,-0.065239
2,0.507029,1.544404,-0.627961,0.158206,-1.773021,-1.503908,-0.256909,0.014677
3,0.888649,-1.933505,-0.423925,-0.476943,0.184528,-1.479792,-0.496063,-0.292087
4,-1.008542,-1.416029,0.720718,-1.277667,0.720664,-0.911296,-0.393592,0.243522


In [47]:
# display the data.
data_y = pd.DataFrame(y)
data_y.head()

Unnamed: 0,0
0,-75.104239
1,-42.705731
2,-212.686407
3,-137.908248
4,-86.251873


In [49]:
#define the parameters
param = {
    "learning_rate" : 0.1,
    "iteration" : 1000
    
}
print("="*100)
linear_reg = LinearRegression(**param)

# Train the model.
linear_reg.train(X[:2], y[:2]) 

# Predict the values.
y_pred = linear_reg.predict(X)

#Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)

The Cost function for the iteration 0----->1866.1065421370427 :)
The Cost function for the iteration 100----->2.548668243109092e-14 :)
The Cost function for the iteration 200----->1.262177448353619e-29 :)
The Cost function for the iteration 300----->1.262177448353619e-29 :)
The Cost function for the iteration 400----->1.262177448353619e-29 :)
The Cost function for the iteration 500----->1.262177448353619e-29 :)
The Cost function for the iteration 600----->1.262177448353619e-29 :)
The Cost function for the iteration 700----->1.262177448353619e-29 :)
The Cost function for the iteration 800----->1.262177448353619e-29 :)
The Cost function for the iteration 900----->1.262177448353619e-29 :)
The r2_score of the trained model 0.08229539696040045


# Lienar Regression using Skicit-Learn

In [7]:
from sklearn.linear_model import LinearRegression as LinearRegression_sklearn
from sklearn.metrics import r2_score

In [8]:
# data is already defined, going to use the same data for comparision.
print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))

Number of training data samples-----> 50000
Number of training features --------> 8


In [9]:
linear_reg_sklearn = LinearRegression_sklearn()
linear_reg_sklearn.fit(X, y)

# predict the value
y_pred_sklearn = linear_reg_sklearn.predict(X)
score = r2_score(y, y_pred_sklearn)
print("="*100)
print("R2 score of the model is {}".format(score))

R2 score of the model is 1.0


In [10]:
# Conclution:
# Our model works well as the scikit learn on speed and accuracy.

# MISTAKES:
- do ranodm initialization of the weights.
- have a cost functions just to print the output

In [33]:
class LinearRegresssion:
    def __init__(self,  learning_rate, iteration):
        self.m = None
        self.n = None 
        self.X = None
        self.y = None
        self.weight = None
        self.it = iteration
        self.lr = learning_rate
    
    def hypothesis(self):
        return np.dot(self.X,self.W) # x=(m,n) , w = [n,1]

    def cost_function(self, y_pred):
        return (1/ (2 * self.m)) * np.sum(np.square(y_pred - self.y))
    
    def train(self, X: np.ndarray, y:np.ndarray):
        self.m = X.shape[0]
        self.n = X.shape[1] # number of features
        print(X.shape)

        # insert the 0 in the first index, for the bias.
        np.insert(X, obj=0, values=1, axis=1)
        self.X = X
        self.y = y

        if X.shape[0] != y.shape[0]:
            return "The number of features must match"
        
        self.W = np.random.randn(self.n, 1) # NOTE: this should be ranodm not  ones
        print(self.W.shape)
        for _ in range(self.it):
            # We need to find the hypothesis 
            # find the cost
            # find the gradients
            # update the weights
            y_pred = self.hypothesis()
            print("y_pred", y_pred.shape)

            cost = self.cost_function(y_pred)
            dw = (1/self.m) * np.dot(self.X.T, (y_pred - y), )
            self.W = self.W - self.lr * dw

            if self.it % 100 == 0:
                print("The Cost function for the iteration {}----->{} :)".format(self.it, cost))


    def predict(self, ):
        pass 

In [34]:
a = np.array([[2,2,2],[1,3,3]])
print(a.shape)
np.insert(a, obj=0, values=1, axis=1)

(2, 3)


array([[1, 2, 2, 2],
       [1, 1, 3, 3]])

In [35]:
# Define the traning data.
X, y = make_regression(n_samples=50000, n_features=8)

# Chnage the shape of the target to 1 dimentional array.
y = y[:, np.newaxis]

print("="*100)
print("Number of training data samples-----> {}".format(X.shape[0]))
print("Number of training features --------> {}".format(X.shape[1]))
print("Shape of the target value ----------> {}".format(y.shape))

#define the parameters
param = {
    "learning_rate" : 0.1,
    "iteration" : 1000
    
}
print("="*100)
linear_reg = LinearRegresssion(**param)

# Train the model.
linear_reg.train(X[:2], y[:2]) 

# Predict the values.
y_pred = linear_reg.predict(X[:2])

#Root mean square error.
score = r2_score(y, y_pred)
print("The r2_score of the trained model", score)

Number of training data samples-----> 50000
Number of training features --------> 8
Shape of the target value ----------> (50000, 1)
(2, 8)
(8, 1)
y_pred (2, 1)
The Cost function for the iteration 1000----->28445.90041614725 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->5963.99497147058 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->1342.0407033390554 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->341.68178680197667 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->103.01159632917182 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->36.771150613974015 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->14.847488551486986 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->6.433171138313389 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->2.885915859103728 :)
y_pred (2, 1)
The Cost function for the iteration 1000----->1.3152731338281693 :)
y_pred (2, 1)
The Cost function for th

TypeError: LinearRegresssion.predict() takes 1 positional argument but 2 were given