### Importing the Libraries

In [28]:
import numpy as np  # For creating nd arrays
from sklearn.model_selection import train_test_split # For random spliting of dataset
from sklearn import linear_model # For using inbuilt LinearRegression in the Library

### Fetching data from Train and Test Set

In [10]:
# Training Set for training the algorithm
train_set = np.loadtxt('boston_train.csv', delimiter = ',')
train_set

array([[-0.40784991, -0.48772236, -1.2660231 , ...,  0.41057102,
        -1.09799011, 37.9       ],
       [-0.40737368, -0.48772236,  0.24705682, ...,  0.29116915,
        -0.52047412, 21.4       ],
       [ 0.1251786 , -0.48772236,  1.01599907, ..., -3.79579542,
         0.89107588, 12.7       ],
       ...,
       [-0.40831101, -0.48772236,  0.24705682, ...,  0.33206621,
        -0.33404299, 20.8       ],
       [-0.41061997, -0.48772236, -1.15221381, ...,  0.203235  ,
        -0.74475218, 22.6       ],
       [ 0.34290895, -0.48772236,  1.01599907, ...,  0.38787479,
        -1.35871335, 50.        ]])

In [11]:
# Test Set for testing the algorithm
test_set = np.loadtxt('boston_test.csv', delimiter = ',')
test_set

array([[ 2.91816626, -0.48772236,  1.01599907, ...,  0.80657583,
        -1.59755122,  1.04106182],
       [-0.40339151, -0.48772236,  0.40609801, ..., -1.13534664,
         0.44105193, -0.89473812],
       [-0.4131781 , -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.50084979],
       ...,
       [-0.41001449,  2.08745172, -1.37837329, ..., -0.0719129 ,
         0.39094481, -0.68167397],
       [-0.40317611, -0.48772236, -0.37597609, ...,  1.13022958,
         0.34007019,  0.20142086],
       [-0.13356344, -0.48772236,  1.2319449 , ..., -1.73641788,
        -2.93893082,  0.48877712]])

In [12]:
x = train_set[:,:-1] # Fetching all the parameters from training_set
y = train_set[:,-1]  # Fetching all the output from training_set

### Creating Class for Linear Regression

In [25]:
# Creating class of Linear Regression for n features using Gradient Descent
class LinearRegression:
    def __init__(self):
        print('Algorithm Initiated')
    
    # Defining the cost function
    def __cost(self,x,y):
        cost = np.zeros(self.__M)
        for rowNo in range(self.__M):
            cost[rowNo] = (y[rowNo]-((self.__constants*x[rowNo]).sum()))**2
        return cost.mean()
    
    # Defining function for gradient descent for minimizing the cost
    def __step_gradient_descent(self,x,y,alpha):
        costderivative = np.zeros(self.__N)
        for colNo in range(self.__N):
            for rowNo in range(self.__M):
                derivative = (-2/self.__M)*(y[rowNo]-((self.__constants*x[rowNo]).sum()))*x[rowNo,colNo]
                costderivative[colNo] += derivative
        self.__constants = self.__constants-alpha*costderivative

    # Training the algorithm to get coefficients having minimum cost
    def __gradient_descent(self,x,y,alpha,iterations):
        for count in range(iterations):
            self.__step_gradient_descent(x,y,alpha)
    
    # Defining fit function which contains x as values of features, y as their corresponding output
    # alpha is learning rate and iteration is no of rounds in the order of minimize of values of coefficient
    # of features.
    def fit(self,x,y,alpha,iterations):
        self.__M = x.shape[0] # No of rows or values in the dataset
        self.__N = x.shape[1] # No of columns or features in the dataset
        self.__constants = np.zeros(self.__N) # Creating a np array for values of coefficients of features
        self.__gradient_descent(x,y,alpha,iterations)
        self.coeff = self.__constants[:-1]    # Seperating coefficients from intercept
        self.intercept = self.__constants[-1] # Y-intercept
    
    # Creating Predict Function to predict the values of test inputs
    def predict(self,test):
        y_pred = np.zeros(self.__M)
        for index in range(self.__M):
            y_pred[index] = (self.__constants*test[index]).sum()
        return y_pred
    
    # Score function
    def score(self,x,y):
        y_pred = self.predict(x)
        u = ((y-y_pred)**2).sum()
        v = ((y-y.mean())**2).sum()
        return 1-u/v

### Testing the algorithm

In [33]:
# Appending the column containing 1s to end of the x
ones = np.ones(x.shape[0]).reshape(x.shape[0],1)
x = np.append(x,ones,axis = 1)
# Splitting the x,y into x_train,x_test,y_train,y_test for testing the algorithm
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 1)
print(x_train.shape)
print(y_train.shape)
algo = LinearRegression()
algo.fit(x_train,y_train,0.02,500)
print('Self Made Algo Score:', algo.score(x_train,y_train))
print('Self Made Algo Predictions:')
print(algo.predict(x_train))

(284, 20)
(284,)
Algorithm Initiated
Self Made Algo Score: 0.7448277559664804
Self Made Algo Predictions:
[17.01376972 39.77341829  7.21502911 29.95814045 44.12417081 26.03397684
 12.71600264 23.3289187  24.46286108 36.76850664 27.85221999 28.24144943
 20.8295491  28.85656205 11.10281496 27.24211072 26.66391027 25.37115466
 15.04746837 21.67135288  9.99034285 33.34813426 27.93158657 27.13110696
 15.39893695 19.42604732 17.90022595 15.98461315 27.93260812 28.60329119
 20.26537215 23.56503518 26.44995916 30.53349846 29.78303994 21.78615168
 20.01524848 19.87793957 36.2071457  24.63252826 23.28143064 20.96787847
 34.04727517 12.8664889  31.59650207 21.56918913 19.88423377 35.15633582
 33.18717585 18.98006632 10.50218086 25.0285985  23.11391723 32.94185867
 29.40089823 22.15481356 18.52958612 24.74309997 17.83087044 20.1668128
 21.19997475 20.90957127 34.49486463 22.43892752 24.72539225 31.25205226
 -5.79998101 16.76537866 32.76426334 30.92723815 14.92696465 18.37924289
 14.00835735 17.670

### Comparing with Linear Regression in SKlearn

In [34]:
inbuilt_algo = linear_model.LinearRegression()
inbuilt_algo.fit(x_train,y_train)
print('Inbuilt algo score:', inbuilt_algo.score(x_train,y_train))
print('Inbuilt algo predictions:')
print(inbuilt_algo.predict(x_train))

Inbuilt algo score: 0.7450606468058916
Inbuilt algo predictions:
[17.0996055  39.77186499  7.1792253  30.1181784  44.10073796 26.08142335
 12.78157538 23.36219083 24.4804286  36.77265792 27.73795027 28.18965494
 20.84401057 28.860753   11.02562881 27.26140374 26.19980977 25.34798721
 14.98818858 21.52165885 10.02596738 33.21378262 27.98432967 27.15829294
 15.03695254 19.27843799 17.92090662 15.90825807 28.25905734 28.6744422
 20.36439081 23.56716958 26.4037441  30.57211125 29.8656989  21.61619623
 19.83666148 19.96024649 36.13776961 24.71995042 23.31931143 21.09045665
 34.12241696 12.84565843 31.85959556 21.66066654 19.79098133 35.1939373
 33.3703234  18.99754128 10.53137669 25.1242361  23.24916593 32.80132352
 29.45604496 22.22713581 18.57233934 24.72479949 18.00182356 20.07040416
 21.37351491 20.81265895 34.73805114 22.5160857  24.79859629 31.51890219
 -5.80547202 16.8377151  33.03161214 30.71002951 14.83164488 18.25359269
 13.94595193 17.57454095 18.30808755  9.28593099 33.20879209 