### Importing the Libraries

In [1]:
import numpy as np  # For creating nd arrays
from sklearn.model_selection import train_test_split # For random spliting of dataset
from sklearn import linear_model # For using inbuilt LinearRegression in the Library

### Fetching data from Train and Test Set

In [2]:
# Training Set for training the algorithm
train_set = np.loadtxt('boston_train.csv', delimiter = ',')
train_set

array([[-0.40784991, -0.48772236, -1.2660231 , ...,  0.41057102,
        -1.09799011, 37.9       ],
       [-0.40737368, -0.48772236,  0.24705682, ...,  0.29116915,
        -0.52047412, 21.4       ],
       [ 0.1251786 , -0.48772236,  1.01599907, ..., -3.79579542,
         0.89107588, 12.7       ],
       ...,
       [-0.40831101, -0.48772236,  0.24705682, ...,  0.33206621,
        -0.33404299, 20.8       ],
       [-0.41061997, -0.48772236, -1.15221381, ...,  0.203235  ,
        -0.74475218, 22.6       ],
       [ 0.34290895, -0.48772236,  1.01599907, ...,  0.38787479,
        -1.35871335, 50.        ]])

In [3]:
# Test Set for testing the algorithm
test_set = np.loadtxt('boston_test.csv', delimiter = ',')
test_set

array([[ 2.91816626, -0.48772236,  1.01599907, ...,  0.80657583,
        -1.59755122,  1.04106182],
       [-0.40339151, -0.48772236,  0.40609801, ..., -1.13534664,
         0.44105193, -0.89473812],
       [-0.4131781 , -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.50084979],
       ...,
       [-0.41001449,  2.08745172, -1.37837329, ..., -0.0719129 ,
         0.39094481, -0.68167397],
       [-0.40317611, -0.48772236, -0.37597609, ...,  1.13022958,
         0.34007019,  0.20142086],
       [-0.13356344, -0.48772236,  1.2319449 , ..., -1.73641788,
        -2.93893082,  0.48877712]])

In [4]:
x = train_set[:,:-1] # Fetching all the parameters from training_set
y = train_set[:,-1]  # Fetching all the output from training_set

### Creating Class for Linear Regression

In [5]:
# Creating class of Linear Regression for n features using Gradient Descent
class LinearRegression:
    def __init__(self):
        print('Algorithm Initiated')
    
    # Defining the cost function
    def __cost(self,x,y):
        try:
            cost = np.zeros(self.__M)
            for rowNo in range(self.__M):
                cost[rowNo] = (y[rowNo]-((self.__constants*x[rowNo]).sum()))**2
            return cost.mean()
        except Exception as e:
            print('Error:', e)
    
    # Defining function for gradient descent for minimizing the cost
    def __step_gradient_descent(self,x,y,alpha):
        try:
            costderivative = np.zeros(self.__N)
            for colNo in range(self.__N):
                for rowNo in range(self.__M):
                    # Adding to derivative of cost with respect to each coefficient
                    derivative = (-2/self.__M)*(y[rowNo]-((self.__constants*x[rowNo]).sum()))*x[rowNo,colNo]
                    costderivative[colNo] += derivative
            self.__constants = self.__constants-alpha*costderivative
        except Exception as e:
            print('Error:', e)

    # Training the algorithm to get coefficients having minimum cost
    def __gradient_descent(self,x,y,alpha,iterations):
        try:
            for count in range(iterations):
                self.__step_gradient_descent(x,y,alpha)
        except Exception as e:
            print('Error:', e)
    
    # Defining fit function which contains x as values of features, y as their corresponding output
    # alpha is learning rate and iteration is no of rounds in the order of minimize of values of coefficient
    # of features.
    def fit(self,x,y,alpha,iterations):
        try:
            self.__M = x.shape[0] # No of rows or values in the dataset
            self.__N = x.shape[1] # No of columns or features in the dataset
            self.__constants = np.zeros(self.__N) # Creating a np array for values of coefficients of features
            self.__gradient_descent(x,y,alpha,iterations)
            self.coeff = self.__constants[:-1]    # Seperating coefficients from intercept
            self.intercept = self.__constants[-1] # Y-intercept
        except Exception as e:
            print('Error:', e)
    
    # Creating Predict Function to predict the values of test inputs
    def predict(self,test):
        try:
            y_pred = np.zeros(test.shape[0])
            for index in range(test.shape[0]):
                y_pred[index] = (self.__constants*test[index]).sum()
            return y_pred
        except Exception as e:
            print('Error:', e)
    
    # Score function
    def score(self,x,y):
        try:
            y_pred = self.predict(x)
            u = ((y-y_pred)**2).sum()
            v = ((y-y.mean())**2).sum()
            return 1-u/v
        except Exception as e:
            print('Error:', e)

In [6]:
# Appending the column containing 1s to end of the x
ones1 = np.ones(x.shape[0]).reshape(x.shape[0],1)
x = np.append(x,ones1,axis = 1)
ones2 = np.ones(test_set.shape[0]).reshape(test_set.shape[0],1)
test_set = np.append(test_set,ones2,axis=1)
print(x.shape)
print(test_set.shape)

(379, 14)
(127, 14)


### Testing the algorithm

In [7]:
# Splitting the x,y into x_train,x_test,y_train,y_test for testing the algorithm
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 1)
algo = LinearRegression()
algo.fit(x_train,y_train,0.02,500)
print('Self Made Algo Score:', algo.score(x_train,y_train))
print('Self Made Algo Predictions:')
print(algo.predict(x_train))

Algorithm Initiated
Self Made Algo Score: 0.7448201427106371
Self Made Algo Predictions:
[17.01567138 39.77760426  7.21601189 29.94682599 44.1253906  26.03476934
 12.71312931 23.33630762 24.46265181 36.76500889 27.85060724 28.23212382
 20.83306722 28.85416055 11.10487519 27.2359065  26.66683838 25.37606074
 15.04731412 21.66845249  9.98829608 33.33763536 27.93216052 27.12893247
 15.39832196 19.42441584 17.90067411 15.98611379 27.92306391 28.60100743
 20.26344586 23.56992979 26.45386343 30.53002221 29.78305366 21.78152239
 20.01178474 19.87650208 36.2053684  24.62992161 23.27785042 20.96930602
 34.04140061 12.86943163 31.59175287 21.56297334 19.88730759 35.15340512
 33.17849616 18.98290989 10.5079858  25.02322229 23.10973811 32.96023701
 29.39168725 22.15130216 18.52931904 24.74327198 17.83231634 20.17095569
 21.19306686 20.91387007 34.4842739  22.43872465 24.7241418  31.23949566
 -5.80267153 16.76222424 32.75222428 30.92552987 14.93033682 18.37474175
 14.00803611 17.67470535 18.2787822

### Comparing with Linear Regression in SKlearn

In [8]:
inbuilt_algo = linear_model.LinearRegression()
inbuilt_algo.fit(x_train,y_train)
print('Inbuilt algo score:', inbuilt_algo.score(x_train,y_train))
print('Inbuilt algo predictions:')
print(inbuilt_algo.predict(x_train))

Inbuilt algo score: 0.7450606468058916
Inbuilt algo predictions:
[17.0996055  39.77186499  7.1792253  30.1181784  44.10073796 26.08142335
 12.78157538 23.36219083 24.4804286  36.77265792 27.73795027 28.18965494
 20.84401057 28.860753   11.02562881 27.26140374 26.19980977 25.34798721
 14.98818858 21.52165885 10.02596738 33.21378262 27.98432967 27.15829294
 15.03695254 19.27843799 17.92090662 15.90825807 28.25905734 28.6744422
 20.36439081 23.56716958 26.4037441  30.57211125 29.8656989  21.61619623
 19.83666148 19.96024649 36.13776961 24.71995042 23.31931143 21.09045665
 34.12241696 12.84565843 31.85959556 21.66066654 19.79098133 35.1939373
 33.3703234  18.99754128 10.53137669 25.1242361  23.24916593 32.80132352
 29.45604496 22.22713581 18.57233934 24.72479949 18.00182356 20.07040416
 21.37351491 20.81265895 34.73805114 22.5160857  24.79859629 31.51890219
 -5.80547202 16.8377151  33.03161214 30.71002951 14.83164488 18.25359269
 13.94595193 17.57454095 18.30808755  9.28593099 33.20879209 

### Predicting Output for test data

In [9]:
# Creating a new object of LinearRegression algorithm that we created
algo1 = LinearRegression()
algo1.fit(x,y,0.02,500) # Training as per given inputs
y_pred = algo1.predict(test_set) # Predicting the output for test_set
y_pred = np.round(y_pred,5) # Rounding off the values to the 5 decimal places
np.savetxt('ans.csv',y_pred,delimiter = ',') # Saving the predictions into the csv file

Algorithm Initiated
