# Linear Regression with Scikit Learn

In [34]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [37]:
def load_house_data():
    data = np.loadtxt("LRwith1varf/data/houses.txt", delimiter=',', skiprows=1)
    X = data[:,:4]
    y = data[:,4]
    return X, y

In [38]:
X_train,y_train=load_house_data()

In [39]:
print(X_train.shape,y_train.shape)

(99, 4) (99,)


In [41]:
X_features = ['size(sqft)','bedrooms','floors','age']

In [42]:
#scaling the features
scaler=StandardScaler()
X_norm=scaler.fit_transform(X_train)

In [57]:
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

Peak to Peak range by column in Raw        X:[2.406e+03 4.000e+00 1.000e+00 9.500e+01]
Peak to Peak range by column in Normalized X:[5.8452591  6.13529646 2.05626214 3.68533012]


In [60]:
sgdr=SGDRegressor(max_iter=1000)
sgdr.fit(X_norm,y_train)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

number of iterations completed: 115, number of weight updates: 11386.0


In [61]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_

In [62]:
print(f"model parameters:                   w: {w_norm}, b:{b_norm}")

model parameters:                   w: [109.95989358 -20.99339224 -32.3976722  -38.07112404], b:[363.1627216]


### Predictions
Predicting the target of the training data.

In [65]:
y_pred_sgd = sgdr.predict(X_norm)
# making prediction using w,b. 
y_pred = np.dot(X_norm, w_norm) + b_norm  
print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")

print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

prediction using np.dot() and sgdr.predict match: True
Prediction on training set:
[295.21132715 485.80805253 389.58021117 491.96754092]
Target values 
[300.  509.8 394.  540. ]
