In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [8]:
df_ = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/USA_Housing.csv')
df_.drop('Address',inplace = True, axis=1)
df_.head()


Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5


In [9]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
df = pd.DataFrame(ss.fit_transform(df_.drop('Price',axis=1)), columns = df_.columns[:5])
df['Price'] = df_['Price']
df.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
0,1.02866,-0.296927,0.021274,0.088062,-1.317599,1059034.0
1,1.000808,0.025902,-0.255506,-0.722301,0.403999,1505891.0
2,-0.684629,-0.112303,1.516243,0.93084,0.07241,1058988.0
3,-0.491499,1.221572,-1.393077,-0.58454,-0.186734,1260617.0
4,-0.807073,-0.944834,0.846742,0.201513,-0.988387,630943.5


In [10]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

model = Lasso(alpha = 0.1)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)
print('MSE :',mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [230464.43562901 164159.10666287 120514.75363711   2913.50583579
 151019.25572424] 
Intercept : 1231278.6353127845
MSE : 10068421667.794294
R2 Score : 0.914681857362938


## Gradient Descent

In [12]:
class GD_Lasso_Regressor:

  def __init__(self, learning_rate, epochs, alpha):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.coef_ = None
    self.intercept_ = None
    self.alpha = alpha

  def fit(self, X_train, y_train):
      self.coef_ = np.zeros(X_train.shape[1])
      self.intercept_ = 0
      X_train = np.array(X_train)
      y_train = np.array(y_train)

      for epoch in range(self.epochs):
          dj_dw = np.zeros(X_train.shape[1])

          for i in range(self.coef_.shape[0]):
              y_hat = np.dot(X_train, self.coef_) + self.intercept_
              if self.coef_[i] > 0:
                  dj_dw[i] = (np.dot((y_hat - y_train), X_train[:,i]) + self.alpha) / X_train.shape[0]
              elif self.coef_[i] <= 0:
                  dj_dw[i] = (np.dot((y_hat - y_train), X_train[:,i]) - self.alpha) / X_train.shape[0]

          dj_db = -2 * np.mean(y_train - y_hat)

          self.coef_ = self.coef_ - self.learning_rate * dj_dw
          self.intercept_ = self.intercept_ - self.learning_rate * dj_db

      return self.coef_, self.intercept_

  def predict(self,X_test):
    self.y_pred = np.dot(self.coef_, X_test.T) + self.intercept_
    return self.y_pred

  @staticmethod
  def mean_squared_error(y_test,y_pred):
    return ((y_test - y_pred)**2).mean()

  @staticmethod
  def r2_score(y_test,y_pred):
    y_mean = np.mean(y_test)
    ss_total = np.sum((y_test - y_mean) ** 2)
    ss_residual = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2


model = GD_Lasso_Regressor(learning_rate=0.1, epochs=500, alpha=0.1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


print('Coefficients :',model.coef_, '\n' 'Intercept :',model.intercept_)
print('MSE :',model.mean_squared_error(y_test,y_pred))
print('R2 Score :',r2_score(y_test,y_pred))

Coefficients : [230464.52517397 164159.19979713 120514.71326432   2913.62422577
 151019.35862316] 
Intercept : 1231278.6368711332
MSE : 10068422551.282862
R2 Score : 0.9146818498764017
