In [2]:
from sklearn import datasets 
import matplotlib.pyplot as plt 
import seaborn as sns
import math
import numpy as np
from scipy import stats
from tqdm import tqdm

data = datasets.load_boston()
data.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [3]:
X,Y = data['data'],data['target']

# Sklearn LinearRegression 

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score,mean_squared_error,r2_score
model = LinearRegression(fit_intercept=False,n_jobs = -1)
model.fit(X,Y)
print('score',model.score(X,Y),'score',r2_score(Y,model.predict(X)))
print(mean_squared_error(Y,model.predict(X)))

score 0.7137381380515404 score 0.7137381380515404
24.166099330126492


In [5]:
model.coef_,model.intercept_

(array([-9.28965170e-02,  4.87149552e-02, -4.05997958e-03,  2.85399882e+00,
        -2.86843637e+00,  5.92814778e+00, -7.26933458e-03, -9.68514157e-01,
         1.71151128e-01, -9.39621540e-03, -3.92190926e-01,  1.49056102e-02,
        -4.16304471e-01]), 0.0)

# Sklearn Rigid 

In [7]:
from sklearn.linear_model import Ridge # L2
from sklearn.metrics import r2_score
model = Ridge(alpha=0.9,max_iter=1000)
model.fit(X,Y)
print(model.score(X,Y),r2_score(Y,model.predict(X)))


0.7390875556177521 0.7390875556177521


# Sklearn Lasso 

In [8]:
from sklearn.linear_model import Lasso # L1
from sklearn.metrics import r2_score
model = Lasso()
model.fit(X,Y)
print(model.score(X,Y),r2_score(Y,model.predict(X)))


0.6825842212709925 0.6825842212709925


# Sklearn Elastic 

In [9]:
from sklearn.linear_model import ElasticNet # L1
from sklearn.metrics import r2_score
model = ElasticNet(alpha = 0.1,l1_ratio=0.1)
model.fit(X,Y)
print(model.score(X,Y),r2_score(Y,model.predict(X)))


0.7247754388307905 0.7247754388307905


# Sklearn Polynomial Regression

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

X_ = PolynomialFeatures(2).fit_transform(X)


model = LinearRegression()
model.fit(X_,Y)
print(model.score(X_,Y),r2_score(Y,model.predict(X_)))

0.9239966562994922 0.9239966562994923


# Implementation 

In [42]:
def mse(y_true,y_pred):
    
    return  np.mean(0.5  * (y_true - y_pred)**2)


class LinearRegression:
    
    def __init__(self,max_iter = None,method = 'OLS',lr = 0.1):
        
        self.W = None 
        self.B = 1 
        self.max_iter = max_iter
        self.method = method
        self.lr = lr
    
    
    def fit(self,X,Y):
        
        #Using Gradient Descent : GD
        #calculate y pred 
        # caluclate loss 
        # calculate gradient 
        # update weights 
        
        #using Odrinary Least Square Methods  : OLS
        
        
        #shapes of W and B 
        # let X - > (batch_size ,  n_features) 
        # W -> (n_features, 1)
        # B -> (1,1)
        
        if self.method == 'GD':
        
            m,n_features = X.shape
            
            

            limit = 1 / math.sqrt(n_features)
            
            self.W = np.random.uniform(-limit, limit, (n_features, ))

            #start gradient descent 
            if self.max_iter == None:
                self.max_iter = 100


            for epochs in range(self.max_iter):
                
                Y_Pred = X.dot(self.W) + self.B
                
             
                loss =   mse(Y,Y_Pred)
                
                if epochs % (self.max_iter / 10) == 0:
                    print('epochs',loss)
                            
            
                dW  =  - (Y - Y_Pred).dot(X) / m
                dB = - np.mean(Y - Y_Pred)
                
                
                self.W -= self.lr * dW    
                self.B -= self.lr * dB
              
                
                
        else:
                # ordinary least square method 
                # y = w x + b 
                
                #  W = (x - xmean) * (y - ymean) / ((x-xmean)^2 + (y-ymean)^2)
                
                # B = Y - W * X
                
                xmean = X.mean(0)
                ymean = Y.mean()

                self.W = (X - xmean).T.dot(Y-ymean)/np.sum((X - xmean)**2)

                self.B = ymean - xmean.dot(self.W.T)
                
                print(self.W,self.B)
                
                
    def predict(self,X):
        
        Ypred = X.dot(self.W) + self.B
        
        return Ypred
                   

In [43]:
from sklearn.preprocessing import Normalizer,StandardScaler

#if the data is not normalized or standardized the model cannot be computed 

X_  = StandardScaler().fit_transform(X)

model = LinearRegression(max_iter=10000,method = 'GD')
model.fit(X_,Y)
r2_score(Y,model.predict(X_))

epochs 272.0773819759306
epochs 10.947416695382442
epochs 10.947415590867832
epochs 10.947415590864601
epochs 10.947415590864601
epochs 10.947415590864601
epochs 10.947415590864603
epochs 10.947415590864603
epochs 10.947415590864603
epochs 10.947415590864603


0.7406426641094094

### observations 
1. if we forget to add the bias term then the r2_score goes into -ve
2. if the data is not standard scled then the model cant be trained properly as the data weights goes to nan 
3. replacing mean() to sum() also causes the weights to go to nan

In [39]:
from sklearn.preprocessing import StandardScaler

X_ = StandardScaler().fit_transform(X)

model = LinearRegression()
model.fit(X_,Y)
r2_score(Y,model.predict(X_))

[-0.27444209  0.254752   -0.34188249  0.12386866 -0.30201755  0.49145963
 -0.26642022  0.17664216 -0.26972202 -0.3311472  -0.35888846  0.23568014
 -0.52135797] 22.532806324110677


0.3284953699385462

## will be adding regularization items 