In [69]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes
import numpy as np

#                                           Multiple Linear Regression from Scratch
```js
        Work :
        ------
                We have N Input columns and 1 Output column. After training, the model tries to PREDICT the outcome for new inputs which may or may not be same as the Actual Output, mostly it will be around the actual output.

        Also compare our model built from Scratch with Sklearn Linear Regression model to see if we are correct or not.
```

In [64]:
class My_MLR: # The handwritten calculations are shown in "Multiple Linear Regression Calculation" pdf.
    def __init__(self): # But only the FINAL Equations are in the LAST SECOND PAGE.
        self.coefficients = None # B1, B2, ...., Bn  (B = Beta = Slope)
        self.intercept = None    # B0
    
    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
        x_train = np.insert(arr=x_train, obj=0, values=1, axis=1)

        betas = np.linalg.inv(np.dot(x_train.T, x_train)) .dot(x_train.T) .dot(y_train)

        self.intercept, self.coefficients = betas[0, 0], betas[1:, 0]
        print(f"Intercept = {self.intercept}.\n")
        print(f"Coefficients (for {x_train.shape[1] - 1} input columns) = \n{self.coefficients}.\n")
    
    def predict(self, x_test: np.ndarray):
        return np.dot(x_test, self.coefficients) + self.intercept
    

def main():
    X, Y = load_diabetes(return_X_y=True) # x.shape = (442, 10), y.shape = (442,).
    Y = np.reshape(a=Y, newshape=(Y.shape[0], 1)) # y.shape = (442, 1)

    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=2)
    
    #                                       Using our Custom MLR Model Object.
    print("                                 Our MLR Model")
    mlr_obj1 = My_MLR()
    mlr_obj1.fit(x_train=x_train, y_train=y_train)
    print(f"For {x_test[0:1][0:1]}, the predicted value = {mlr_obj1.predict(x_test[0:1][0:1]) [0]}.")

    #                                       Using Sklearn LR Model.
    print("\n                               Sklearn Linear Regression Model")
    mlr_obj2 =  LinearRegression()
    mlr_obj2.fit(X=x_train, y=y_train)
    print(f"Intercept = {mlr_obj2.intercept_[0]}.\n")
    print(f"Coefficients (for {X.shape[1]} input columns) = \n{mlr_obj2.coef_[0]}.\n")

    print(f"For {x_test[0:1][0:1]}, the predicted value = {mlr_obj2.predict(x_test[0:1][0:1]) [0][0]}.")

if __name__ == "__main__":
    main()

explanation = """
    mlr_obj_.predict(x_test[0:1][0:1]   =   [[154.1213881]], because ML Model only accepts 2D Data.
"""

                                 Our MLR Model
Intercept = 151.8833100525417.

Coefficients (for 10 input columns) = 
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238].

For [[ 0.06713621 -0.04464164  0.00349435  0.03564379  0.0493413   0.03125356
   0.07072993 -0.03949338 -0.00061174  0.01963284]], the predicted value = 154.12138809538385.

                               Sklearn Linear Regression Model
Intercept = 151.88331005254167.

Coefficients (for 10 input columns) = 
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238].

For [[ 0.06713621 -0.04464164  0.00349435  0.03564379  0.0493413   0.03125356
   0.07072993 -0.03949338 -0.00061174  0.01963284]], the predicted value = 154.1213880953837.


```js
    If you want to see how a regression line i.e. "HYPER PLANE" looks like in ND Dimension, for convenience e.g. how a "Plane"/Regression_Line looks  like in 3 Dimensions i.e. 3 Columns, see the Last Graph of => https://colab.research.google.com/github/campusx-official/100-days-of-machine-learning/blob/main/day50-multiple-linear-regression/multiple_linear_regression.ipynb#scrollTo=NpAvnU-t3yV0
```

#                                   Evaluate the Performace of Linear Regression Model

In [68]:
#                                                 1) Train the Model

X, Y = load_diabetes(return_X_y=True) # x.shape = (442, 10), y.shape = (442,).
Y = np.reshape(a=Y, newshape=(Y.shape[0], 1)) # y.shape = (442, 1)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

mlr_obj =  LinearRegression()
mlr_obj.fit(X=x_train, y=y_train)
y_predict = mlr_obj.predict(x_test)

#                                    2) Calculate the Performance Measuring Matrices.

print("mean_absolute_error =", mean_absolute_error(y_true=y_test, y_pred=y_predict)) # returns 'loss'. The less, the good.
print("mean_squared_error =",  mean_squared_error(y_true=y_test, y_pred=y_predict)) # returns 'loss'. The less, the good.
print("Root mean_squared_error =", np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_predict)))

r2_scoree = r2_score(y_true=y_test, y_pred=y_predict)
print("\nR2 Score =", r2_scoree)

n = x_test.shape[0] # x_test, y_test, y_predict, they all have same shape. Any one works.
k = X.shape[1] # number of input columns.
adjusted_r2_score = 1 - ((1-r2_scoree) * (n-1)/(n-1-k))
print("adjusted_r2_score =", adjusted_r2_score) # always check both r2_score and adjusted_r2_score.

mean_absolute_error = 45.21303419046903
mean_squared_error = 3094.4566715660626
Root mean_squared_error = 55.627840795469155

R2 Score = 0.4399338661568968
adjusted_r2_score = 0.3681305156641912
