In [5]:
# Train/Predict a multiple linear regression model using sklearn

import numpy as np
import pandas as pd

# import diabetes dataset from sklearn
from sklearn.datasets import load_diabetes

# load the diabetes dataset
diabetes = load_diabetes()

# create a dataframe
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)

# print the first 5 rows
df.head()

# add the target column
df['target'] = diabetes.target

# X = all colums except target
X = df.drop('target', axis=1)

# y = target column
y = df['target']

# split the data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# print the shape of train and test
print(X_train.shape)
print(X_test.shape)

# import LinearRegression from sklearn
from sklearn.linear_model import LinearRegression

# create an instance of LinearRegression
model = LinearRegression()

# fit the model
model.fit(X_train, y_train)

# print the intercept
print("intercept = ", model.intercept_)

# print the coefficients
print("coefficients = ", model.coef_)

# predict the target
y_pred = model.predict(X_test)

# Find the mean squared error, mean absolute error and r2 score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
print("Mean Squared Error = ", mean_squared_error(y_test, y_pred))
print("Mean Absolute Error = ", mean_absolute_error(y_test, y_pred))
print("R2 Score = ", r2_score(y_test, y_pred))

(353, 10)
(89, 10)
intercept =  151.88334520854633
coefficients =  [  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
Mean Squared Error =  3094.4295991207023
Mean Absolute Error =  45.212924812996754
R2 Score =  0.4399387660024645


In [40]:
# Train/Predict a multiple linear regression model from scratch
# Match the coefficients, intercept and error scores with sklearn

class MultipleLinearRegression:
    def __init__(self):
        self.coefficients = None
        self.intercept = None

    def fit(self, X_train, y_train):
        print(X_train.shape)
        # add a column of ones to X_train
        X_train = np.c_[np.ones(X_train.shape[0]), X_train]

        # calculate the coefficients
        self.coefficients = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)

        # calculate the intercept
        self.intercept = self.coefficients[0]

        # remove the first element from the coefficients
        self.coefficients = self.coefficients[1:]

        # print the intercept and coefficients
        print("intercept = ", self.intercept)
        print("coefficients = ", self.coefficients)

    def predict(self, X_test):
        # calculate the predicted target
        y_pred = X_test.dot(self.coefficients) + self.intercept

        return y_pred

In [41]:
# create an instance of MultipleLinearRegression
model = MultipleLinearRegression()

# fit the model
model.fit(X_train, y_train)

# predict the target
y_pred = model.predict(X_test)

# Find the mean squared error, mean absolute error and r2 score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
print("Mean Squared Error = ", mean_squared_error(y_test, y_pred))
print("Mean Absolute Error = ", mean_absolute_error(y_test, y_pred))
print("R2 Score = ", r2_score(y_test, y_pred))


(353, 10)
intercept =  151.88334520854627
coefficients =  [  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
Mean Squared Error =  3094.4295991207023
Mean Absolute Error =  45.21292481299675
R2 Score =  0.4399387660024645
