In [28]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [29]:
def load_house_data():
    data = np.loadtxt('houses.txt', delimiter=',', skiprows=1)
    X = data[:, :4]
    y = data[:, 4]
    return X, y

In [30]:
X_train, y_train = load_house_data()
X_features = ['size(sqft)','bedrooms','floors','age']

# Normalize the training data

In [31]:
scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)

print(f"Peak to Peak range by column in raw x: {np.ptp(X_train, axis = 0)}")
print(f"Peak to peak range by column Normalized x:{np.ptp(X_norm, axis = 0)}")


Peak to Peak range by column in raw x: [2.406e+03 4.000e+00 1.000e+00 9.500e+01]
Peak to peak range by column Normalized x:[5.8452591  6.13529646 2.05626214 3.68533012]


# Create and fit the regression model

In [32]:
sgdr = SGDRegressor(max_iter = 1000)
sgdr.fit(X_norm, y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

SGDRegressor()
number of iterations completed: 118, number of weight updates: 11683.0


In [33]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_

print(f"model parameters:                   w: {w_norm}, b:{b_norm}")
print( "model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b: 363.16")

model parameters:                   w: [110.03630135 -20.99983558 -32.40460251 -38.07583252], b:[363.16157672]
model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b: 363.16


# Closed formed solution using normal equation

In [34]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)


In [35]:
b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w:}, b = {b:0.2f}")

w = [  0.26860107 -32.62006902 -67.25453872  -1.47297443], b = 220.42


In [36]:
print(f"Prediction on training set: \n {linear_model.predict(X_train)[:5]}")
print(f"Prediction using w, b: \n {(X_train @ w + b)[:5]}")
print(f"")

Prediction on training set: 
 [295.17615301 485.97796332 389.52416548 492.14712499 420.24701825]
Prediction using w, b: 
 [295.17615301 485.97796332 389.52416548 492.14712499 420.24701825]
