## Feature scaling and learning rate

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from lab_utils_multi import load_house_data, run_gradient_descent
from lab_utils_multi import norm_plot, plt_equal_scale, plot_cost_i_w
from lab_utils_common import dlc
np.set_printoptions(precision=2)

##  Dataset: 
| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|----------------------- |  
| 952             | 2                   | 1                | 65           | 271.5                  |  
| 1244            | 3                   | 2                | 64           | 232                    |  
| 1947            | 3                   | 2                | 17           | 509.8                  |  
| ...             | ...                 | ...              | ...          | ...                    |

In [None]:
x_train, y_train=load_house_data()
x_features=['size(sqft)', 'bedrooms', 'floors', 'age']

In [None]:
fig, ax=plt.subplots(1, 4, figsize=(12, 3), sharey=True)

for i in range(len(ax)):
  ax[i].scatter(x_train[:, i], y_train)
  ax[i].set_xlabel(x_features[i])

ax[0].set_ylabel('Price (1000s)')
plt.show()

In [None]:
_, _, hist=run_gradient_descent(x_train, y_train, 10, alpha=9.9e-7)

It appears the learning rate is too high. The solution does not converge. Cost is $increasing$ rather than decreasing.

In [None]:
plot_cost_i_w(x_train, y_train, hist)

In [None]:
_, _, hist=run_gradient_descent(x_train, y_train, 10, alpha=9e-7)

In [None]:
plot_cost_i_w(x_train, y_train, hist)

In [None]:
_, _, hist=run_gradient_descent(x_train, y_train, 10, alpha=1e-7)

In [None]:
plot_cost_i_w(x_train, y_train, hist)

### Z-score normalization

In [None]:
def z_score_feature_normalization(x):
  mu=np.mean(x, axis=0)
  sigma=np.std(x, axis=0)

  x_norm=(x-mu)/sigma

  return (x_norm, mu, sigma)

In [None]:
mu=np.mean(x_train, axis=0)
sigma=np.std(x_train, axis=0)

x_mean=x_train-mu
x_norm=(x_train-mu)/sigma

fig, ax=plt.subplots(1, 3, figsize=(12, 3))

ax[0].scatter(x_train[:, 0], x_train[:, 3])
ax[0].set_xlabel(x_features[0])
ax[0].set_ylabel(x_features[3])
ax[0].set_title('Un-normalized')
ax[0].axis('equal')

ax[1].scatter(x_mean[:, 0], x_mean[:, 3])
ax[1].set_xlabel(x_features[0])
ax[1].set_ylabel(x_features[3])
ax[1].set_title('Mean-normalized')
ax[1].axis('equal')

ax[2].scatter(x_norm[:, 0], x_norm[:, 3])
ax[2].set_xlabel(x_features[0])
ax[2].set_ylabel(x_features[3])
ax[2].set_title('Z-normalized')
ax[2].axis('equal')

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.suptitle('Distribution of features before, during, after normalization')
plt.show()

In [None]:
x_norm, x_mu, x_sigma=z_score_feature_normalization(x_train)

print(f'x_mu: {x_mu}, x_sigma={x_sigma}')
print(f'Peak to peak range by column in Raw form        x: {np.ptp(x_train, axis=0)}')
print(f'Peak to peak range by column in Normalized form x: {np.ptp(x_norm, axis=0)}')

In [None]:
fig, ax=plt.subplots(1, 4, figsize=(12, 3))
for i in range(len(ax)):
  norm_plot(ax[i], x_train[:, i])
  ax[i].set_xlabel(x_features[i])
ax[0].set_ylabel('Count')
fig.suptitle('Distribution of features before normalization')
plt.show()


fig, ax=plt.subplots(1, 4, figsize=(12, 3))
for i in range(len(ax)):
  norm_plot(ax[i], x_norm[:, i])
  ax[i].set_xlabel(x_features[i])
ax[0].set_ylabel('Count')
fig.suptitle('Distribution of features after normalization')
plt.show()

In [None]:
w_norm, b_norm, hist=run_gradient_descent(x_norm, y_train, 1000, 1.0e-1)

In [None]:
m=x_norm.shape[0]
yp=np.zeros(m)
for i in range(m):
  yp[i]=np.dot(x_norm[i], w_norm)+b_norm

fig, ax=plt.subplots(1, 4, figsize=(12, 3), sharey=True)
for i in range(len(ax)):
  ax[i].scatter(x_train[:, i], y_train, label='Target')
  ax[i].set_xlabel(x_features[i])
  ax[i].scatter(x_train[:, i], yp, color=dlc['dlorange'], label='Predicted')
ax[0].set_ylabel('Price')
ax[0].legend()

fig.suptitle('Target vs Predicted using z-score normalized model')
plt.show()

Now we can then predict the price for other houses. For example, a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old.

In [None]:
x_house=np.array([1200, 3, 1, 40])
x_house_norm=(x_house-x_mu)/x_sigma
print(x_house_norm)

x_house_pred=np.dot(x_house_norm, w_norm)+b_norm
print(f'The price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old is ${x_house_pred:0.3f}')

In [None]:
plt_equal_scale(x_train, x_norm, y_train)