# Multiple Linear Regression from Scratch

<img src='images/1.jpg'>

#### Simplify:
<img src='images/2.jpg'>

#### Mean Squared Error:
<img src='images/MSE.jpg'>

#### wieghts and Bias
<img src='images/4w_b.jpg'>


In [1]:
import pandas as pd
import numpy as np

In [2]:
class MultipleLinearRegression():
    ''' Class that implements Multiple Linear Regression '''
    def __init__(self):
        self.b = 0
        self.w = []
        
    def fit(self, X, y):
        # If there is only one feature we need to reshape input.
        if len(X.shape) == 1:
            X.reshape(-1, 1)
            
        # Add 'ones' to model coefficient b in data.
        ones = np.ones(shape=X.shape[0]).reshape(-1, 1)
        X = np.concatenate((ones, X), 1)
        
        coeficients = np.linalg.inv(X.transpose().dot(X)).dot(X.transpose()).dot(y)
        self.b = coeficients[0]
        self.w = coeficients[1:]

    def predict(self, X):
        predictions = []
        for x in X:
            prediction = self.b

            for xi, wi in zip(x, self.w):
                prediction += wi * xi
            
            predictions.append(prediction)
            
        return predictions

In [3]:
from sklearn.datasets import load_boston
bstn_data = load_boston()
bstn_data.keys()
print(bstn_data.feature_names)
print(bstn_data.target[0:10])

df = pd.DataFrame(bstn_data.data)
# Create columns in df
df.columns = bstn_data.feature_names
# Create a target column with the dataset target
df['target'] = bstn_data.target

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9]


In [4]:
df.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6


In [5]:
from sklearn.model_selection import train_test_split
X = df.drop('target', axis=1).values
y = df['target'].values

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=123)

model = MultipleLinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

pd.DataFrame({'Actual Value': y_test, 'Vanilla Model Prediction': y_pred})

Unnamed: 0,Actual Value,Vanilla Model Prediction
0,15.0,16.003300
1,26.6,27.794474
2,45.4,39.267695
3,20.8,18.326136
4,34.9,30.454875
...,...,...
97,31.5,31.250833
98,23.3,26.610479
99,33.3,36.455957
100,17.5,16.524144


### Compare Vanilla Model with SciKit Learn

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [7]:
# Use and train SciKit Learn Linear Regression
sk_model = LinearRegression()
sk_model.fit(X_train, y_train)
sk_y_pred = sk_model.predict(X_test)

In [8]:
# Compare
pd.DataFrame({'Actual Value': y_test, 'Vanilla Model Prediction': y_pred, 'SciKit Model Prediction': sk_y_pred})

Unnamed: 0,Actual Value,Vanilla Model Prediction,SciKit Model Prediction
0,15.0,16.003300,16.003300
1,26.6,27.794474,27.794474
2,45.4,39.267695,39.267695
3,20.8,18.326136,18.326136
4,34.9,30.454875,30.454875
...,...,...,...
97,31.5,31.250833,31.250833
98,23.3,26.610479,26.610479
99,33.3,36.455957,36.455957
100,17.5,16.524144,16.524144


In [9]:
accuracy=r2_score(y_test, y_pred)
print(f"Accuracy:{accuracy}")

Accuracy:0.6592466510353829
