# Scikit-Learn


In [1]:
!python --version

Python 3.9.6


In [3]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from common import load_house_data

np.set_printoptions(precision=2)

In [4]:
X_train,y_train = load_house_data()
x_features = ['size','bedrooms','floors','age']

scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)

print(f"peak to peak column in row X_train:{np.ptp(X_train,axis=0)}")
print(f"peak to peak column in row X_norm:{np.ptp(X_norm,axis=0)}")

peak to peak column in row X_train:[2.406e+03 4.000e+00 1.000e+00 9.500e+01]
peak to peak column in row X_norm:[5.8452591  6.13529646 2.05626214 3.68533012]


In [5]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm,y_train)

print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

SGDRegressor()
number of iterations completed: 119, number of weight updates: 11782.0


In [6]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_

print(f"model parameters:                   w: {w_norm}, b:{b_norm}")

model parameters:                   w: [110.06600213 -21.01672824 -32.40634408 -38.08056126], b:[363.15445623]


In [7]:
# make a prediction using sgdr.predict()
y_pred_sgd = sgdr.predict(X_norm)
# make a prediction using w,b.
y_pred = np.dot(X_norm, w_norm) + b_norm

print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")

print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

prediction using np.dot() and sgdr.predict match: True
Prediction on training set:
[295.14545385 485.92277737 389.62855519 492.08609139]
Target values 
[300.  509.8 394.  540. ]
