In [78]:
from sklearn import datasets
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [79]:
boston = datasets.load_boston()
bostonDf = pd.DataFrame(boston.data, columns = boston.feature_names)
bostonDf['target'] = boston.target

In [80]:
xtr, xte, ytr, yte = train_test_split(bostonDf.drop('target', axis = 1), bostonDf['target'], test_size = 0.2, random_state = 1)

In [93]:
class CustomLinearRegression:
    def fit(self, features, t):
        x = np.copy(features)
        x = np.insert(x, 0, 1.0, axis = 1)
        xT = np.transpose(x)
        xTx = np.matmul(xT, x)
        xTx_inv = np.linalg.inv(xTx)
        xTt = np.matmul(xT, t)
        self.w = np.matmul(xTx_inv, xTt)
        self.intercept_ = self.w[0]
        self.coef_ = self.w[1:]
    
    def predict(self, x):
        x_new = np.copy(x)
        x_new = np.insert(x_new, 0, 1.0, axis = 1)
        rows = x_new.shape[0]
        wTx = []
        wT = np.transpose(self.w)
        for i in range(rows):
            wTx.append(np.matmul(wT, x_new[i]))
            
        return wTx
    
    def rmse_score(self, t, y):
        mse = ((t - y) ** 2).mean()
        rmse = np.sqrt(mse)
        return rmse
    
    def score(self, y_true, y_predict):
        u = ((y_true - y_predict)** 2).sum()
        v = ((y_true - y_true.mean()) ** 2).sum()
        return 1 - (u/v)

In [96]:
custom_model = CustomLinearRegression()
custom_model.fit(xtr, ytr)
print(custom_model.intercept_)
print(custom_model.coef_)

custom_ytrain_predict = custom_model.predict(xtr)
print(f"\nTraining Data: \nRMSE -> {custom_model.rmse_score(ytr, custom_ytrain_predict)}\nCoefficient of determination: {custom_model.score(ytr, custom_ytrain_predict)}")
custom_ytest_predict = custom_model.predict(xte)
print(f"\nTesting Data: \nRMSE -> {custom_model.rmse_score(yte, custom_ytest_predict)}\nCoefficient of determination: {custom_model.score(yte, custom_ytest_predict)}")


42.93352585337766
[-1.12386867e-01  5.80587074e-02  1.83593559e-02  2.12997760e+00
 -1.95811012e+01  3.09546166e+00  4.45265228e-03 -1.50047624e+00
  3.05358969e-01 -1.11230879e-02 -9.89007562e-01  7.32130017e-03
 -5.44644997e-01]

Training Data: 
RMSE -> 4.675766751547771
Coefficient of determination: 0.7293585058196337

Testing Data: 
RMSE -> 4.835373458200654
Coefficient of determination: 0.7634174432138358


In [101]:
def rmse_score(t, y):
    mse = ((t - y) ** 2).mean()
    rmse = np.sqrt(mse)
    return rmse

In [103]:
from sklearn.linear_model import LinearRegression
original_model = LinearRegression()
original_model.fit(xtr, ytr)
print(original_model.intercept_)
print(original_model.coef_)

ytr_pred = original_model.predict(xtr)
print(f"\nTraining Data: \nRMSE -> {rmse_score(ytr, ytr_pred)}\nCoefficient of determination: {original_model.score(xtr, ytr)}")
yte_pred = original_model.predict(xte)
print(f"\nTesting Data: \nRMSE -> {rmse_score(yte, yte_pred)}\nCoefficient of determination: {original_model.score(xte, yte)}")


42.93352585337733
[-1.12386867e-01  5.80587074e-02  1.83593559e-02  2.12997760e+00
 -1.95811012e+01  3.09546166e+00  4.45265228e-03 -1.50047624e+00
  3.05358969e-01 -1.11230879e-02 -9.89007562e-01  7.32130017e-03
 -5.44644997e-01]

Training Data: 
RMSE -> 4.675766751547773
Coefficient of determination: 0.7293585058196337

Testing Data: 
RMSE -> 4.835373458200553
Coefficient of determination: 0.7634174432138456


In [140]:
data = np.array([
    ['Custom Model', 'Training Data', custom_model.intercept_, custom_model.rmse_score(ytr, custom_ytrain_predict),
        custom_model.score(ytr, custom_ytrain_predict)],
    ['SKLEARN Model', 'Training Data', original_model.intercept_, rmse_score(ytr, ytr_pred),
        original_model.score(xtr, ytr)],
    ['Custom Model', 'Testing Data', custom_model.intercept_, custom_model.rmse_score(yte, custom_ytest_predict),
        custom_model.score(yte, custom_ytest_predict)],
    ['SKLEARN Model', 'Testing Data', original_model.intercept_, rmse_score(yte, yte_pred),
        original_model.score(xte, yte)]
])
result = pd.DataFrame(data, columns=['Model', 'Data', 'Intercept', 'RMSE Score', 'Coefficient'])
print(f"Linear Regression using custom model and sklearn built-in model: ")
result

Linear Regression using custom model and sklearn built-in model: 


Unnamed: 0,Model,Data,Intercept,RMSE Score,Coefficient
0,Custom Model,Training Data,42.93352585337766,4.675766751547771,0.7293585058196337
1,SKLEARN Model,Training Data,42.93352585337733,4.675766751547773,0.7293585058196337
2,Custom Model,Testing Data,42.93352585337766,4.835373458200654,0.7634174432138358
3,SKLEARN Model,Testing Data,42.93352585337733,4.835373458200553,0.7634174432138456
