In [32]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

In [33]:
def load_house_data():
    data = np.loadtxt("./data/houses.txt", delimiter=",", skiprows=1)
    x = data[:, :4]
    y = data[:, 4]
    return x, y

In [34]:
x_train, y_train = load_house_data()
x_labels = ['size(sqft)', 'bedrooms', 'floors', 'age']

In [35]:
scaler = StandardScaler()
x_norm = scaler.fit_transform(x_train)

In [36]:
print(f"peak-to-peak raw: {np.ptp(x_train, axis=0)}")
print(f"peak-to-peak normalized: {np.ptp(x_norm, axis=0)}")

peak-to-peak raw: [2.406e+03 4.000e+00 1.000e+00 9.500e+01]
peak-to-peak normalized: [5.8452591  6.13529646 2.05626214 3.68533012]


In [37]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(x_norm, y_train)

print(sgdr)
print(f"iteration completed: {sgdr.n_iter_}")
print(f"number of weight updates: {sgdr.t_}")

SGDRegressor()
iteration completed: 129
number of weight updates: 12772.0


In [38]:
w_norm = sgdr.coef_
b_norm = sgdr.intercept_

with np.printoptions(precision=2, suppress=True):
    print(f"model parameters:                   w: {w_norm}, b:{b_norm}")
    print( "model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b:[363.16]")

model parameters:                   w: [110.27 -21.04 -32.45 -38.02], b:[363.17]
model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b:[363.16]


In [39]:
y_pred_sgd = sgdr.predict(x_norm)
y_pred = x_norm @ w_norm + b_norm

with np.printoptions(precision=2, suppress=False):
    print(f"prediction using `np.dot()` and `sgdr.predict` match: {(y_pred == y_pred_sgd).all()}")
    print(f"prediction: {y_pred[:4]}")
    print(f"target    : {y_train[:4]}")

prediction using `np.dot()` and `sgdr.predict` match: True
prediction: [295.16 486.07 389.73 492.24]
target    : [300.  509.8 394.  540. ]
