In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston

dataset = load_boston()
boston = pd.DataFrame(dataset.data, columns = dataset.feature_names)
boston['target'] = dataset.target

boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(boston.iloc[:, :-1], boston.iloc[:, -1], 
                                                    test_size = 156,
                                                    random_state = 10,
                                                    shuffle = True)

print('Training : ', X_train.shape, ' Testing : ', X_test.shape)

Training :  (350, 13)  Testing :  (156, 13)


## Handcrafted - Ordinary Least Square Linear Regression 

In [38]:
class my_LR():
    
    def fit(self, X, y):
        """
        Fit the linear model
        
        Parameters:
        -----------
        X : a numpy array of form [num_samples, num_features]
        y : numpy array of form [num_samples, 1]
        
        """
        
        Xn = np.ndarray.copy(X)
        yn = np.ndarray.copy(y)

        X_mean = Xn.mean(axis=0)
        X_std = Xn.std(axis=0)
        X_std[X_std == 0] = 1
        self.X_mean = X_mean
        self.X_std = X_std
        
        Xn = (Xn - X_mean) / X_std
        Xn = np.hstack((np.ones(X.shape[0])[np.newaxis].T, Xn))
        
        y_mean = yn.mean(axis=0)
        yn = yn - y_mean
        self.y_mean = y_mean

        # Use Matrix Solution method from numpy
        try:
            self.coef_, self.residuals_, self.rank_, self.singular_ = np.linalg.lstsq(Xn, yn)
        except Exception:
            print('Error occurred in Matrix operation.')
            raise
            
    def get_Params(self):
        """
        Return the regression coefficient for X
        """
        return self.coef_
    
    def predict(self, X):
        """
        Predict regression values for X
        """
        
        Xn = np.ndarray.copy(X)
        Xn = (Xn - self.X_mean) / self.X_std
        Xn = np.hstack((np.ones(X.shape[0])[np.newaxis].T, Xn))
        
        predict = np.dot(Xn, self.coef_) + self.y_mean
        
        return predict

In [39]:
testlr = my_LR()
testlr.fit(X_train.values, y_train.values)

## Scikit Learn - Linear Regression

In [48]:
from sklearn.linear_model import LinearRegression, Lasso

sklr = LinearRegression()
sklr.fit(X_train, y_train)
#sklr.predict(X_test)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

## Scikit Learn - Lasso Regression

In [49]:
from sklearn.linear_model import Lasso
sklasso = Lasso()
sklasso.fit(X_train, y_train)
#sklasso.predict(X_test)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

## C : Plot the price of my_LR, sklearn_LR, actual price