In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df_ = pd.read_csv('USA_Housing.csv')
df_.drop('Address',inplace = True, axis=1)
df_.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5


In [5]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
df = pd.DataFrame(ss.fit_transform(df_.drop('Price',axis=1)), columns = df_.columns[:5])
df['Price'] = df_['Price']
df.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,1.02866,-0.296927,0.021274,0.088062,-1.317599,1059034.0
1,1.000808,0.025902,-0.255506,-0.722301,0.403999,1505891.0
2,-0.684629,-0.112303,1.516243,0.93084,0.07241,1058988.0
3,-0.491499,1.221572,-1.393077,-0.58454,-0.186734,1260617.0
4,-0.807073,-0.944834,0.846742,0.201513,-0.988387,630943.5


In [6]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']

## sklearn Linear Regression

In [7]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)

y_pred = model.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score

print('MSE :',mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [230464.52520198 164159.19982574 120514.71328446   2913.62424452
 151019.35865248] 
Intercept : 1231278.6368716825
MSE : 10068422551.400883
R2 Score : 0.9146818498754016


## Nesterov Accelerated Gradient Descent

In [31]:
class NAG:

    def __init__(self, learning_rate, epochs, momentum):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.momentum = momentum
        self.coef_ = None
        self.intercept_ = None
        self.velocity_coef_ = None
        self.velocity_intercept_ = None
        self.y_pred = None

    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        self.coef_ = np.zeros((X_train.shape[1],))
        self.intercept_ = 0
        self.velocity_coef_ = np.zeros_like(self.coef_)
        self.velocity_intercept_ = 0

        for epoch in range(self.epochs):
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)

            for i in indices:
                index = np.random.randint(0, X_train.shape[0])

                # Nesterov Accelerated Gradient (NAG) update
                look_ahead_coef = self.coef_ - self.momentum * self.velocity_coef_
                look_ahead_intercept = self.intercept_ - self.momentum * self.velocity_intercept_

                y_hat = np.dot(look_ahead_coef, X_train[index].T) + look_ahead_intercept

                dj_dw = np.dot((y_hat - y_train[index]), X_train[index]) / X_train.shape[0]
                dj_db = -2 * np.mean(y_train[index] - y_hat)

                # Momentum updates
                self.velocity_coef_ = self.momentum * self.velocity_coef_ + self.learning_rate * dj_dw
                self.velocity_intercept_ = self.momentum * self.velocity_intercept_ + self.learning_rate * dj_db

                # Update weights and intercept
                self.coef_ = self.coef_ - self.velocity_coef_
                self.intercept_ = self.intercept_ - self.velocity_intercept_

        return self.coef_, self.intercept_

    def predict(self, X_test):
        self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
        return self.y_pred

    @staticmethod
    def mean_squared_error(y_test, y_pred):
        return ((y_test - y_pred) ** 2).mean()

    @staticmethod
    def r2_score(y_test, y_pred):
        y_mean = np.mean(y_test)
        ss_total = np.sum((y_test - y_mean) ** 2)
        ss_residual = np.sum((y_test - y_pred) ** 2)
        r2 = 1 - (ss_residual / ss_total)
        return r2


model = NAG(learning_rate=0.1, epochs=70, momentum=0.5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Coefficients:', model.coef_, '\nIntercept:', model.intercept_)
print('MSE:', model.mean_squared_error(y_test, y_pred))
print('R2 Score:', model.r2_score(y_test, y_pred))


Coefficients: [229881.71649119 164743.4617047  120699.31900036   2845.6473084
 150450.18342634] 
Intercept: 1196329.2593111948
MSE: 11473117303.555122
R2 Score: 0.9027786984997319
