In [None]:
import numpy as np
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LinearRegression

In [None]:
class MyLinearRegression:
    def __init__(self, learning_rate=0, num_of_iterations=0):
        self.learning_rate = learning_rate
        self.num_of_iterations = num_of_iterations
        self.min_cost=0
        self.M=0
        self.n=0
        self.coef_= np.zeros(self.n)
        self.intercept_= 0

    def __cost(self, X, Y, m):
        total_cost=0  #Total cost
        for i in range(self.M):
            x=X[i,:]
            y=Y[i]
            total_cost+=(1/self.M)*((y-(m*x).sum())**2)   #Total Cost
        return total_cost
    
    def __step_gradient(self, X, Y, m):
        slope=np.zeros(X.shape[1])   #slope of tangent  
        for i in range(self.M):
            x=X[i,:]
            y=Y[i]
            '''mul=m*x
            val=y
            for j in range(n):
                val-=mul[j]'''
            slope+=(-2/self.M)*(y-(m*x).sum())*x   #slope of tangent
        new_m=m-self.learning_rate*slope
        return new_m
    
    def __gradient_descent(self, points):
        try:
            X=points[0]            #Input vector
            self.M=X.shape[0]      #Total number of datapoints in X
            self.n=X.shape[1]      #Total number of features in X
            Y=points[1]       #Output
            X_n=np.append(X,np.ones(self.M).reshape(-1,1),axis=1)     #n+1 column is 1
            m=np.zeros(self.n+1)   #here m is slope of best fit line
            for i in range(self.num_of_iterations):
                m = self.__step_gradient(X_n, Y, m)
                if i%100==0:
                    print(f"Cost after {i}th iterations is: {self.__cost(X_n, Y, m)}.")
            self.min_cost=self.__cost(X_n, Y, m)
            return m[:-1], m[-1]
        except Exception as e:
            print(e)
            return np.zeros(self.n),0
    
    def fit(self, X, Y):
        self.coef_, self.intercept_ = self.__gradient_descent((X,Y))
        
    def predict(self, X_test):
        val= np.zeros(X_test.shape[0])
        add= X_test*self.coef_
        for j in range(X_test.shape[0]):
            for i in range(X_test.shape[1]):
                val[j]+=add[j][i]
        return val + self.intercept_
    
    def score(self, Y_truth, Y_pred):
        u=np.sum((Y_truth-Y_pred)**2)
        v=np.sum((Y_truth-Y_truth.mean())**2)
        coeff=1-(u/v)
        return coeff

In [None]:
boston_train= pd.read_csv("training_boston_x_y_train.csv")
boston_test= pd.read_csv("test_boston_x_test.csv")

In [None]:
boston_train.head()

In [None]:
Y_train= boston_train[" Y"]
del boston_train[" Y"]

In [None]:
columns = boston_train.columns

In [None]:
for i in range(len(columns)):
    boston_train[ str(columns[i])+ "_" + str(columns[i]) ] = boston_train[ columns[i] ]**2
    boston_test[ str(columns[i])+ "_" + str(columns[i]) ] = boston_test[ columns[i] ]**2

In [None]:
boston_train.head()

In [None]:
X_train= boston_train.values
X_test= boston_test.values
X_train.shape, X_test.shape

In [None]:
alg1= MyLinearRegression(0.0002,6000)
alg1.fit(X_train, Y_train)

In [None]:
alg1.coef_, alg1.intercept_

In [None]:
alg1.min_cost

In [None]:
# test data
y_test_pred = alg1.predict(X_test)

#train data
y_train_pred = alg1.predict(X_train)
print("Train Score: ", alg1.score(Y_train, y_train_pred))

In [None]:
np.savetxt("predictions_boston.csv", y_test_pred, delimiter=",", fmt="%.18f")

# Sklearn Linear Regression

In [None]:
alg2=LinearRegression()
alg2.fit(X_train, Y_train)

In [None]:
alg2.coef_, alg2.intercept_

In [None]:
# test data
y_test_pred_skl = alg2.predict(X_test)

#train data
y_train_pred = alg2.predict(X_train)
print("Train Score: ", alg2.score(X_train, Y_train))

# Plotting Graph

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.scatter(y_test_pred, y_test_pred_skl)
x= np.arange(0,50,0.1)
plt.plot(x,x,color="r")
plt.show()