In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

In [27]:
class PolynomialMiniGD:
    def __init__(self, learning_rate, epochs, degree):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        self.degree = degree
        self.poly_features = PolynomialFeatures(degree = self.degree)

    def fit(self, X_train, y_train):
        self.intercept_ = 0
        X_poly = self.poly_features.fit_transform(X_train)
        self.coef_ = np.ones(X_poly.shape[1])

        for _ in range(self.epochs):
            indices = np.random.choice(X_train.shape[0], size=10, replace=True)
            X_temp = self.poly_features.transform(X_train.iloc[indices, :])
            y_temp = y_train.iloc[indices]

            y_pred = np.dot(X_temp, self.coef_) + self.intercept_
            delta_inter = (-2) * np.mean(y_temp - y_pred)
            self.intercept_ = self.intercept_ - (self.lr * delta_inter)

            delta_coef = (-2/X_temp.shape[1]) * np.dot(X_temp.T, (y_temp - y_pred))
            self.coef_ = self.coef_ - (self.lr * delta_coef)

        print(self.coef_, self.intercept_)

    def predict(self, X_test):
        return np.dot(self.poly_features.transform(X_test), self.coef_) + self.intercept_

In [5]:
df = pd.read_csv("/Users/raaggee/Documents/MachineLearningImplementations/house_price_regression_dataset.csv")
df.head()

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,262382.9
1,4272,3,3,2016,4.753014,1,6,985260.9
2,3592,1,2,2016,3.634823,0,9,777977.4
3,966,1,2,1977,2.730667,1,8,229698.9
4,4926,2,1,1993,4.699073,0,8,1041741.0


In [6]:
df = df.drop("Year_Built", axis=1)
df.head()

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,0.599637,0,5,262382.9
1,4272,3,3,4.753014,1,6,985260.9
2,3592,1,2,3.634823,0,9,777977.4
3,966,1,2,2.730667,1,8,229698.9
4,4926,2,1,4.699073,0,8,1041741.0


In [7]:
X = df.iloc[:, :6]
y = df["House_Price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
poly_min_gd = PolynomialMiniGD(0.00001, 1, 2)
poly_min_gd.fit(X_train, y_train)

[-9.69832103e+01 -4.04296599e+05 -3.01700010e+02 -1.72841530e+02
 -2.14447672e+02 -9.47132624e+01 -4.00909570e+02 -1.72927017e+09
 -1.25308440e+06 -7.22438927e+05 -8.39649092e+05 -4.19065433e+05
 -1.56307427e+06 -1.01563927e+03 -5.73270696e+02 -7.02668119e+02
 -2.66937151e+02 -1.26479530e+03 -3.65951968e+02 -4.05534160e+02
 -1.76945560e+02 -6.55620017e+02 -6.36172386e+02 -2.28387943e+02
 -1.00001956e+03 -1.69729625e+02 -2.66709255e+02 -2.74178134e+03] -274.352988778843


In [29]:
y_predict = poly_min_gd.predict(X_test)
r2_score(y_test, y_predict)

-6.607218864704335e+21