In [42]:
import numpy as np
import math

In [43]:
class LinearRegression2():
    #Define a Linear Regression class to store our relevant functions in
    def init(self):
        '''
        Initialize the model.
        Store the self.params variable, which will be the weights that the model returns.
        '''
        self.params = None
    def gradientDescent( self, X, y, learning_rate = 0.00001, iterations = 500, batch_size = 16):
        '''
        This function applies the Gradient Descent model on the dataset
        : param X : numpy.ndarray
            The X matrix containing the independent variable columns .
        : param y : numpy.ndarray
            The target vectory .
        '''
        #Add a column of ones for the constant term
        X = np.concatenate([ X, np.ones_like(y)], axis = 1)
        rows , cols = X.shape
        #Combine the X and y columns to more easily shuffle it later
        X = np.append(X, y , axis = 1)
        #Make the initial random guess for w
        w = np.random.random(( cols, 1))
        #Go through all the iterations
        for i in range(iterations):
            #Shuffle the rows of the data
            np.random.shuffle(X)
            #Define X and y again
            y_it = X[:, -1].reshape((rows, 1))
            x_it = X[:, :-1]
            #Iterate through the batches
            for batch in range (math.ceil(rows / batch_size)):
                batch_start = batch * batch_size
                #Cut a batch from the dataset
                x_batch = x_it[batch_start: min(batch_start + batch_size, X.shape[0])]
                y_batch = y_it[batch_start: min(batch_start + batch_size, X.shape[0])]
                #Subtract the gradient from our previous estimation
                w -= learning_rate * np.matmul(x_batch.transpose(),(np.matmul(x_batch, w) - y_batch))

        self.params = w
        return self
    def predict( self, X):
        X = np.concatenate([X, np.ones( X.shape[0]).reshape((X.shape[0], 1))], axis = 1)
        return np.matmul(X, self.params)


In [44]:
gr = LinearRegression2()

In [45]:
gr.gradientDescent(X, y, learning_rate = 0.00001, iterations = 500, batch_size = 16)

<__main__.LinearRegression2 at 0x251035e8c88>

In [53]:
gr.predict(X)[99]

array([299.89385246])

In [52]:
X[99]

array([100.])

In [29]:
#Same model but from Sklearn library
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression #!!

In [8]:
#Define some mock−up data.
#Create a 10x2 matrix with random integers in the range 0−100.
X = np.linspace(0, 100, 100).reshape( 100, 1)
#Define a random noise vector to add onto the vectory.
e = np.random.uniform(-5, 5, (100, 1))
#Define the vector y = 3∗x + e.
y = 3*X + e

In [22]:
#Split the data in train and test.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [30]:
#Create an instance of the Linear Regression class from sklearn.
lr = LinearRegression()
#Call the fit function , which applies the Linear Regression model
lr.fit( X_train, y_train)

LinearRegression()

In [31]:
# De f ine a v a r i a b l e t h a t s t o r e s t h e p r e d i c t i o n s o f our model .
y_pred = lr.predict(X_test)
#Print the mean squared error and mean absolute error to get an idea of how well the model has done .
mean_squared_error(y_pred, y_test), mean_absolute_error(y_pred, y_test)

(7.839801252768398, 2.3977675746920863)