In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score

In [37]:
class MiniBatchGD:
    def __init__(self, learning_rate, epochs):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs

    def fit(self, X_train, y_train):
        self.coef_ = np.ones(X_train.shape[1])
        self.intercept_ = 0

        for _ in range(self.epochs):
            indices = np.random.choice(X_train.shape[0], size=10, replace=False)
            X_temp = X_train.iloc[indices, :]
            y_temp = y_train.iloc[indices]

            y_pred = np.dot(X_temp, self.coef_) + self.intercept_
            delta_inter = (-2) * np.mean(y_temp - y_pred)
            self.intercept_ = self.intercept_ - (self.lr * delta_inter)

            delta_coef = (-2/X_temp.shape[1]) * np.dot(X_temp.T, (y_temp - y_pred))
            self.coef_ = self.coef_ - (self.lr * delta_coef)

        print(self.coef_, self.intercept_)

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [38]:
df = pd.read_csv("/Users/raaggee/Documents/MachineLearningImplementations/house_price_regression_dataset.csv")
df.head()

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,262382.9
1,4272,3,3,2016,4.753014,1,6,985260.9
2,3592,1,2,2016,3.634823,0,9,777977.4
3,966,1,2,1977,2.730667,1,8,229698.9
4,4926,2,1,1993,4.699073,0,8,1041741.0


In [39]:
df = df.drop("Year_Built", axis=1)
df.head()

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,0.599637,0,5,262382.9
1,4272,3,3,4.753014,1,6,985260.9
2,3592,1,2,3.634823,0,9,777977.4
3,966,1,2,2.730667,1,8,229698.9
4,4926,2,1,4.699073,0,8,1041741.0


In [40]:
X = df.iloc[:, :6]
y = df["House_Price"]

In [41]:
n = X.shape[0]
indices = np.random.choice(n, size=10, replace=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
mgd = MiniBatchGD(0.001, 2)
mgd.fit(X_train, y_train)

[-1.33903330e+11 -1.28344076e+08 -1.04416888e+08 -1.23839913e+08
 -3.74239542e+07 -3.15836339e+08] -25681952.66806372


In [43]:
y_pred = mgd.predict(X_test)
r2_score(y_test, y_pred)

-2.660582552605969e+18

In [44]:
print(X_test.shape)
print(y_test.shape)

(200, 6)
(200,)
