## Normalization using Scikit-Learn

This is an open-source, commercially usable machine learning toolkit called [scikit-learn](https://scikit-learn.org/stable/index.html). This toolkit contains implementations of many of the algorithms

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from lab_utils_multi import load_house_data
from lab_utils_common import dlc
np.set_printoptions(precision=2)

In [None]:
x_train, y_train=load_house_data()
x_features=['size(sqft)', 'bedrooms', 'floors', 'age']

### Scaling the training data

In [None]:
scaler=StandardScaler()
x_norm=scaler.fit_transform(x_train)

print(f'Peak to Peak range by column in Raw form:        X: {np.ptp(x_train, axis=0)}')
print(f'Peak to Peak range by column in Normalized form: X: {np.ptp(x_norm, axis=0)}')

### Create and fit the regression model

In [None]:
sgdr=SGDRegressor(max_iter=1000)
sgdr.fit(x_norm, y_train)

print(f'Number of iterations completed: {sgdr.n_iter_}')
print(f'Number of weights updated: {sgdr.t_}')

In [None]:
b_norm=sgdr.intercept_
w_norm=sgdr.coef_

print(f'Weights: {w_norm}, Bias: {b_norm}')

### Making predictions

In [None]:
y_pred_sgd=sgdr.predict(x_norm)

y_pred=np.dot(x_norm, w_norm)+b_norm

print(f'Prediction using np.dot():       {y_pred}')
print(f'Prediction using sgdr.predict(): {y_pred_sgd}')
print(f'Do both match? {(y_pred==y_pred_sgd).all()}')

### Plotting the results

In [None]:
fig, ax=plt.subplots(1, 4, figsize=(12, 3), sharey=True)
for i in range(len(ax)):
  ax[i].scatter(x_train[:, i], y_train, label='Target')
  ax[i].set_xlabel(x_features[i])
  ax[i].scatter(x_train[:, i], y_pred_sgd, color=dlc['dlorange'], label='Predicted')
ax[0].set_ylabel('Price')
ax[0].legend()

fig.suptitle('Target vs Predicted using z-score normalized model')
plt.show()