In this notebook, I will try to create a multiple linear regression model based on the principle. Its function is similar to LinearRegression in [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html). At the same time, I added some simple functions, such as table comparison and visualization, to facilitate machine learning Beginners can better understand the construction and function of multiple regression models in principle.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Model Define

In [None]:
class multiple_linear_regression():
       
    def train(self, X, y):
        features = np.append(X,np.ones((len(X),1)),axis=1)
        self.W = np.dot(np.dot(np.linalg.inv(np.dot(features.T, features)), features.T), y)
        self.coef_ = self.W[:-1]
        self.intercept_ = self.W[-1]
    
    def predict(self, new_X):
        nums = len(new_X)
        preds = np.dot(np.append(new_X,np.ones((nums,1)),axis=1),self.W)
        self.preds = preds
        return preds
    
    def contrast(self, X, y):
        Target = pd.DataFrame()
        Target['Ground Truth'] = y
        Target['Model Preds'] = self.preds
        return Target
    
    def evaluate(self, X, y):
        Target = self.contrast(X,y)
        mse = np.sum((Target['Ground Truth'] - Target['Model Preds'])**2) / len(Target)
        rmse = np.sqrt(mse)
        r2= 1- np.sum((Target['Ground Truth'] - Target['Model Preds'])**2)/ np.sum((Target['Ground Truth'] - np.mean(Target['Ground Truth']))**2)
        print('The root mean square error is %.5f' %rmse)
        print('The r2 score is %.5f' %r2)
    
    def plot(self, X, y):
        Target = self.contrast(X,y)
        plt.figure(figsize=(6,5))
        plt.title('Model prediction VS Ground Truth')
        plt.xlabel('Ground Truth')
        plt.ylabel('Model Preds')
        plt.scatter(Target['Ground Truth'], Target['Model Preds'], c='green', marker='o')
        plt.plot(np.linspace(np.min(y),np.max(y),100), np.linspace(np.min(y),np.max(y),100), '--', lw=1.5, c='r', label='multiple linear model')
        plt.legend(loc='upper left')
        plt.grid()
        plt.show()

## Load Dataset

In [None]:
df = pd.read_csv('../input/introduce-data/insurance.csv')
df.head()

In [None]:
df = df[:50]
X = df.iloc[:,:-1].values
y = df.charges.values
X.shape,y.shape

## Train and Evaluate

In [None]:
# Initialize the model and train
model = multiple_linear_regression()
model.train(X,y)

In [None]:
# Model prediction
model.predict(X)

In [None]:
# comparison 
model.contrast(X,y)

In [None]:
# Regression performance evaluation
model.evaluate(X,y)

In [None]:
# visualization
model.plot(X,y)

In [None]:
# Intercept parameter
model.intercept_

In [None]:
# Weight parameter
model.coef_

## Thank you for your reading !