**Importing Libraries**

In [1]:
import numpy as np
import pandas as pd

**Model Implementation**

In [2]:
class Linear_Regression():
    
    def __init__(self, learning_rate, no_of_iteration):
        self.learning_rate = learning_rate
        self.no_of_iteration = no_of_iteration
        
    def fit(self, X, Y):
        # no of training data points and no of features
        self.m, self.n = X.shape   # X.shape gives no of rows and columns in X, m = rows(data points) and n = columns(features)

        # initializing weights and bias
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        # initializing Gradient Descent
        for i in range(self.no_of_iteration):
            self.update_weights()
        
    def update_weights(self):
        y_predict = self.predict(self.X)

        # Update Gradient
        dw = -(2*(self.X.T).dot(self.Y - y_predict))/self.m
        db = -(2*np.sum(self.Y - y_predict))/self.m

        #Update weights and bias
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        return X.dot(self.w)+self.b

    # Function To calculate Mean Square Error
    def Mean_Sq_Error(self, y_predict, Y):
        return np.mean(np.square(y_predict - Y))

    # Function To Calculate Mean Absolute Error
    def mean_ab_error(self, y_predict, Y):
        return np.mean(np.abs(y_predict - Y))

    # Function To Calculate R2 Error
    def r2_score(self, y_pred, Y):
        ss_res = np.sum(np.square(Y - y_pred))
        ss_tot = np.sum(np.square(Y - np.mean(Y)))
        return 1 - (ss_res / ss_tot)

    
    

## Evaluating Model Performance ##

In [3]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

**Loading Data**

In [4]:
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns = data.feature_names)
df['target'] = data.target

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
 8   target      20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [6]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


### Model Evaluation using custom build model ###

**Splitting Data into test and train**

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X = df.drop(['target'], axis = 1)
Y = df['target']

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

In [10]:
X_train.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
14196,3.2596,33.0,5.017657,1.006421,2300.0,3.691814,32.71,-117.03
8267,3.8125,49.0,4.473545,1.041005,1314.0,1.738095,33.77,-118.16
17445,4.1563,4.0,5.645833,0.985119,915.0,2.723214,34.66,-120.48
14265,1.9425,36.0,4.002817,1.033803,1418.0,3.994366,32.69,-117.11
2271,3.5542,43.0,6.268421,1.134211,874.0,2.3,36.78,-119.8


**Scaling the features for better prediction**

In [11]:
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
scaler.fit(X_train)  # Learn scaling params only from train data

0,1,2
,with_centering,True
,with_scaling,True
,quantile_range,"(25.0, ...)"
,copy,True
,unit_variance,False


In [12]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

**Model Training**

In [13]:
model = Linear_Regression(learning_rate = 0.0053, no_of_iteration = 100000)
model.fit(X_train_scaled, Y_train)

**Result Evaluation**

In [14]:
Y_predict = model.predict(X_test_scaled)

In [15]:
print("Sklearn R2 Score:", model.r2_score(Y_test, Y_predict))
print("Sklearn MSE:", model.Mean_Sq_Error(Y_test, Y_predict))

Sklearn R2 Score: 0.3376701627081874
Sklearn MSE: 0.5558915988008454


### Model Evaluation using sklearn library ###

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [17]:
lr = LinearRegression()
lr.fit(X_train_scaled, Y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [18]:
y_pred = lr.predict(X_test_scaled)
print("Sklearn R2 Score:", r2_score(Y_test, y_pred))
print("Sklearn MSE:", mean_squared_error(Y_test, y_pred))

Sklearn R2 Score: 0.5757877060324512
Sklearn MSE: 0.5558915986952438


**Comparing Weights and bias**

In [21]:
model.b  # Bias value of custom model

np.float64(2.0300235973493175)

In [22]:
lr.intercept_  # Bias value of sklearn model

np.float64(2.0300236007230614)

In [20]:
print("Custom weights:", model.w)       # Weights of different features
print("Sklearn weights:", lr.coef_)

Custom weights: [ 0.98998998  0.1847609  -0.19842504  0.07349028 -0.00190175 -0.00300161
 -1.59101349 -1.64809061]
Sklearn weights: [ 0.98998997  0.18476089 -0.19842504  0.07349028 -0.00190175 -0.00300161
 -1.59101352 -1.64809065]
