# Machine Learning Master Notes 10 - Feature Scaling (Reversing Scaled Weights)

### Prepare Environment

In [1]:
%matplotlib inline
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib import cm

# SciKit Learn Regression Model
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

# SciKit Learn Pre-processing and Feature Scaling
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

# The following file contain the finalized gradient descent, cost function program 
import MyRegressionProgramV1 as my

## Multiple Linear Regression: Cost Function and Gradient Descent

Hypothesis: $$f_{\vec{w},b}(\vec{X}^{(i)})=b + \sum\limits_{j=0}^{n-1} \vec{w}_{j}\vec{X}_{j}^{(i)}$$


Cost Function:	$$J(\vec w, b) = \frac{1}{2m}   \sum\limits_{i=0}^{m-1} (f_{\vec w,b}(\vec{X}^{(i)})-\vec y^{(i)})^{2}$$ 
$$J(\vec w, b) = \frac{1}{2m} \sum\limits_{i=0}^{m-1} \left(\left(b + \sum\limits_{j=0}^{n-1} \vec w_{j} \vec X_{j}^{(i)} \right)-\vec y^{(i)}\right)^{2}$$
$$J(\vec w, b) = \frac{1}{2m} \sum\limits_{i=0}^{m-1} \left(\left(b + \vec X^{(i)} \cdot \vec w \right)-\vec y^{(i)}\right)^{2}$$
$$$$
Gradient Descent Algorithm: $$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline
\;  \vec w &= \vec w -  \alpha \frac{\partial J(\vec{w},b)}{\partial \vec{w}}  \; \newline 
 b &= b -  \alpha \frac{\partial J(\vec{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$


Partial Derivatives: $$
\begin{align}
\frac{\partial J(\vec{w},b)}{\partial \vec{w}}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\vec{w},b}(\vec{X}^{(i)}) - y^{(i)})\vec{X}^{(i)} \\
  \frac{\partial J(\vec{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\vec{w},b}(\vec{X}^{(i)}) - y^{(i)}) \\
\end{align}
$$

Full Implementation of Gradient Descent:
$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline
\;  \vec{w} &= \vec{w} -  \alpha \frac{1}{m} \sum\limits_{i = 0}^{m-1} ((\vec{X}^{(i)}\cdot \vec{w} + b) - y^{(i)})\vec{X}^{(i)}  \; \newline 
 b &= b -  \alpha \frac{1}{m} \sum\limits_{i = 0}^{m-1} ((\vec{X}^{(i)}\cdot \vec{w} + b) - y^{(i)})  \newline \rbrace
\end{align*}$$

### Prepare Data

In [2]:
df = pd.read_csv('./data/housing_one_var.csv')
df.head()

Unnamed: 0,sqft,price
0,2104,399900
1,1600,329900
2,2400,369000
3,1416,232000
4,3000,539900


In [3]:
df2 = pd.read_csv('./data/housing_two_var.txt')
df2.head()

Unnamed: 0,sqft,rm,price
0,2104,3,399900
1,1600,3,329900
2,2400,3,369000
3,1416,2,232000
4,3000,4,539900


### Own Function

In [4]:
def mean_norm(X):
    """
    This function is mean normalizer.
    Formula is x(scaled) = x - mean / (max of x) - (min of x)
    There is no similar scaler in sklearn
    """
    big = X.max(axis=0)
    small = X.min(axis=0)
    norm_range = big - small
    avg = X.mean(axis=0)
    scaled = (X - avg) / norm_range
    return scaled, avg, norm_range

In [5]:
def minmax_scaling(X):
    """
    This function is to replicate the same method as sklearn MinMaxScaler
    Formula is x(scaled) = x - min(x) / max(x) - min(x)
    This function produce similar result as sklearn MinMaxScaler
    """
    maximum = X.max(axis=0)
    minimum = X.min(axis=0)
    range = (maximum - minimum)
    scaled = (X - minimum) / range
    return scaled, minimum, range

In [6]:
def std_norm_v2(X):
    """
    This function is z-score normalizer.
    Formula is x(scaled) = x - mean / {std deviation of x}
    There is similar scaler in sklearn is StandardScaler
    """
    ### the following check if data type is Series
    ### if is Series convert to data frame
    if isinstance(X, pd.Series):
        X = X.to_frame()
    
    avg = X.mean(axis=0)

    ### the following check if data type is dataframe
    if isinstance(X, pd.DataFrame):
        std = X.std(axis=0, ddof=0)
    else:
        std = X.std(axis=0)
    
    X_norm = (X-avg)/std
    
    return X_norm, avg, std

## Scaling Back Coefficient and Intercept After Feature Scaling (One Feature)

### Scaling Back Z-Score Scaling

For the following examples, we train the model used normalized data and then convert the normalized coeficient and intercept into un-normalized state.

For Z-Score Normalization, the formula is

$$x_{scaled} = \frac{x - \mu}{\sigma}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{scaled} * w_{scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x - \mu}{\sigma} * w_{scaled}$$

$$\hat{y} = b_{scaled} + (\frac{x - \mu}{\sigma}) w_{scaled}$$



$$\hat{y} = b_{scaled} + (\frac{x - \mu}{\sigma}) w_{scaled}$$
$$\hat{y} = b_{scaled} + \frac{xw_{scaled}}{\sigma} - \frac{\mu w_{scaled}}{\sigma}$$
$$\hat{y} = b_{scaled} + x(\frac{w_{scaled}}{\sigma}) - \frac{\mu w_{scaled}}{\sigma}$$
$$\hat{y} = b + xw$$
$$\hat{y} = \left(b_{scaled}  - \frac{\mu w_{scaled}}{\sigma}\right) + x\left(\frac{w_{scaled}}{\sigma}\right)$$

In [7]:
x_train4 = df['sqft']
y_train4 = df['price']
x_train4 = x_train4.to_frame()
y_train4 = y_train4.to_frame()

In [8]:
# The following is to train the model using normal data
# This is for comparison purpose
reg4 = LinearRegression().fit(x_train4, y_train4)
print('w',reg4.coef_)
print('b',reg4.intercept_)

w [[134.52528772]]
b [71270.49244873]


In [9]:
# Sklearn with standard scaler
scaler4 = StandardScaler().fit(x_train4)
normalized_arr4 = scaler4.transform(x_train4)
reg4 = LinearRegression().fit(normalized_arr4, y_train4)
print('w',reg4.coef_)
print('b',reg4.intercept_)

w [[105764.13349282]]
b [340412.65957447]


In [10]:
print('std deviation',scaler4.scale_)
print('mean',scaler4.mean_)

std deviation [786.20261874]
mean [2000.68085106]


In [11]:
# My gradient Descent
x_norm4, avg4, stddev4 = std_norm_v2(x_train4)
coef4, intercept4, _, _, _ = my.compute_gradient_descent(x_norm4, y_train4)

iteration 9999: Last cost = 2.0581e+09: intercept = 3.4041e+05: weights = [[105764.13349281]]
best w [[105764.1335]]
best b 340412.6596


In [12]:
print('std deviation',stddev4)
print('mean',avg4)

std deviation sqft    786.202619
dtype: float64
mean sqft    2000.680851
dtype: float64


In [13]:
# SciKit Learn Regression without Scaling
lr4 = LinearRegression().fit(x_train4, y_train4)
print('w',lr4.coef_)
print('b',lr4.intercept_)

w [[134.52528772]]
b [71270.49244873]


#### To scale back Original Coeficient

In [14]:
# Scale back Sklearn
or_coef_sk = reg4.coef_ / scaler4.scale_
or_coef_sk

array([[134.52528772]])

In [15]:
or_intercept_sk = reg4.intercept_ - reg4.coef_ * scaler4.mean_ / scaler4.scale_
or_intercept_sk

array([[71270.49244873]])

In [16]:
coef4.shape

(1, 1)

In [17]:
stddev4.shape

(1,)

In [18]:
# Scale back own GD
or_coef_gd = coef4[0] / stddev4
or_coef_gd

sqft    134.525288
dtype: float64

In [19]:
or_intercept_gd = intercept4 - coef4[0] * avg4 / stddev4
or_intercept_gd

sqft    71270.492449
dtype: float64

In [20]:
def reverse_bw_std_norm_v1(coef, intercept, avg, stddev):
    '''
    The following is to reverse the coeficient and intercept after using std_norm
    please use the same mean and std deviation from std_norm
    '''
    coef = coef.reshape(1,)
    or_coef_gd = coef / stddev
    or_intercept_gd = intercept + np.negative(coef * avg / stddev).sum()
    return or_coef_gd, or_intercept_gd

In [21]:
reverse_bw_std_norm_v1(coef4, intercept4, avg4, stddev4)

(sqft    134.525288
 dtype: float64,
 71270.49244872801)

### Scaling Back Simple Scaling

Previously, to remove the Python overflow error, we just divide the size of the house by 1000. We can also compute the non-scaled intercept and coefficient.

For simple scaling, the formula is

$$x_{scaled} = \frac{x}{1000}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{scaled} * w_{scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x}{1000} * w_{scaled}$$

$$\hat{y} = b_{scaled} + (\frac{x}{1000}) w_{scaled}$$

$$\hat{y} = b_{scaled} + x\frac{w_{scaled}}{1000}$$

In [22]:
x_train4.head()

Unnamed: 0,sqft
0,2104
1,1600
2,2400
3,1416
4,3000


In [23]:
coef5, intercept5, _, _, _ = my.compute_gradient_descent(x_train4/1000, y_train4)

iteration 9999: Last cost = 2.0581e+09: intercept = 7.1270e+04: weights = [[134525.34483267]]
best w [[134525.3448]]
best b 71270.3637


In [24]:
or_coef = coef5 / 1000
or_coef

array([[134.52534483]])

In [25]:
or_intercept = intercept5
or_intercept

71270.36374313367

### Scaling Back Mean Normalization Scaling

For mean normalization, the formula is

$$x_{scaled} = \frac{x - \mu}{max(x)-min(x)}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{scaled} * w_{scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x - \mu}{max(x)-min(x)} * w_{scaled}$$

$$\hat{y} = b_{scaled} + \left(\frac{x - \mu}{max(x)-min(x)}\right) w_{scaled}$$

$$\hat{y} = b_{scaled} + \frac{xw_{scaled} - w_{scaled}\mu}{max(x)-min(x)}$$
$$\hat{y} = b_{scaled} + \frac{xw_{scaled}}{max(x)-min(x)} - \frac{w_{scaled}\mu}{max(x)-min(x)}$$

$$\hat{y} = b_{scaled} + x\left(\frac{w_{scaled}}{max(x)-min(x)}\right) - \frac{w_{scaled}\mu}{max(x)-min(x)}$$
$$\hat{y} = b + xw$$
$$\hat{y} = b_{scaled} - \frac{w_{scaled}\mu}{max(x)-min(x)} + x\left(\frac{w_{scaled}}{max(x)-min(x)}\right)$$

In [26]:
x_train6 = df['sqft']
y_train6 = df['price']
x_train6 = x_train4.to_numpy()
y_train6 = y_train4.to_numpy()

In [27]:
x_norm6, avg6, norm_range6 = mean_norm(x_train6)
coef6, intercept6, _, _, _ = my.compute_gradient_descent(x_norm6,y_train6, iterations = 30000)

iteration 29999: Last cost = 2.0581e+09: intercept = 3.4041e+05: weights = [[487788.32884679]]
best w [[487788.3288]]
best b 340412.6596


In [28]:
or_coef = coef6[0] / norm_range6
or_coef

array([134.52518722])

In [29]:
or_intercept = intercept6 - coef6[0] * avg6 / norm_range6
or_intercept

array([71270.69352477])

**It takes longer to mean normalization to converge. 3 times longer.**

### Scaling Back MinMax Scaling

For MinMax Scaler, the formula is

$$x_{scaled} = \frac{x - min(x)}{max(x)-min(x)}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{scaled} * w_{scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x - min(x)}{max(x)-min(x)} * w_{scaled}$$

$$\hat{y} = b_{scaled} + \left(\frac{x - min(x)}{max(x)-min(x)}\right) w_{scaled}$$

$$\hat{y} = b_{scaled} + \frac{xw_{scaled} - min(x)w_{scaled}}{max(x)-min(x)}$$
$$\hat{y} = b_{scaled} + \frac{xw_{scaled}}{max(x)-min(x)} - \frac{min(x)w_{scaled}}{max(x)-min(x)}$$

$$\hat{y} = b_{scaled} + x\left(\frac{w_{scaled}}{max(x)-min(x)}\right) - \frac{min(x)w_{scaled}}{max(x)-min(x)}$$
$$\hat{y} = b + xw$$
$$\hat{y} = b_{scaled} - \frac{min(x)w_{scaled}}{max(x)-min(x)} + x\left(\frac{w_{scaled}}{max(x)-min(x)}\right)$$

In [30]:
x_train7 = x_train6.copy()
y_train7 = y_train6.copy()

In [31]:
x_norm7, min7, range7 = minmax_scaling(x_train7)
coef7, intercept7, _, _, _ = my.compute_gradient_descent(x_norm7,y_train7, iterations = 30000)

iteration 29999: Last cost = 2.0581e+09: intercept = 1.8589e+05: weights = [[487787.59848666]]
best w [[487787.5985]]
best b 185886.3998


In [32]:
or_coef = coef7[0] / range7
or_coef

array([134.52498579])

In [33]:
or_intercept = intercept7 - (coef7[0] * min7 / range7)
or_intercept

array([71271.11192188])

## Scaling Back Coefficient and Intercept After Feature Scaling (Two Features)

### Scaling Back Coefficient and Intercept for Z-Score

For Z-Score Normalization, the formula is

$$x_{scaled} = \frac{x - \mu}{\sigma}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{1 scaled} * w_{1 scaled}+ x_{2 scaled} * w_{2 scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x_1 - \mu_1}{\sigma_1} * w_{1scaled} + \frac{x_2 - \mu_2}{\sigma_2} * w_{2scaled}$$

$$\hat{y} = b_{scaled} + \left(\frac{x_1 - \mu_1}{\sigma_1}\right)w_{1scaled} + \left(\frac{x_2 - \mu_2}{\sigma_2}\right)w_{2scaled}$$



$$\hat{y} = b_{scaled} + \left(\frac{x_1 - \mu_1}{\sigma_1}\right)w_{1scaled} + \left(\frac{x_2 - \mu_2}{\sigma_2}\right)w_{2scaled}$$
$$\hat{y} = b_{scaled} + \frac{x_1 w_{1scaled}}{\sigma_1} - \frac{\mu_1 w_{1scaled}}{\sigma_1} + \frac{x_2 w_{2scaled}}{\sigma_2} - \frac{\mu_2 w_{2scaled}}{\sigma_2}$$
$$\hat{y} = b_{scaled} + x_1\left(\frac{w_{1scaled}}{\sigma_1}\right) - \frac{\mu_1 w_{1scaled}}{\sigma_1} + x_2\left(\frac{w_{2scaled}}{\sigma_2}\right) - \frac{\mu_2 w_{2scaled}}{\sigma_2}$$
$$\hat{y} = b_{scaled}  - \frac{\mu_1 w_{1scaled}}{\sigma_1} - \frac{\mu_2 w_{2scaled}}{\sigma_2} + x_1\left(\frac{w_{1scaled}}{\sigma_1}\right) + x_2\left(\frac{w_{2scaled}}{\sigma_2}\right)$$

In [34]:
X_train8 = df2[['sqft','rm']]
y_train8 = df2['price']

In [35]:
# SciKit Learn Normal data
lr8 = LinearRegression().fit(X_train8, y_train8)
print('w',lr8.coef_)
print('b',lr8.intercept_)

w [  139.21067402 -8738.01911233]
b 89597.90954279748


In [36]:
X_norm8, avg8, stddev8 = std_norm_v2(X_train8)
coef8, intercept8, _, _, _ = my.compute_gradient_descent(X_norm8, y_train8, iterations = 20000)

iteration 19999: Last cost = 2.0433e+09: intercept = 3.4041e+05: weights = [[109447.79646964  -6578.35485416]]
best w [[109447.7965]
 [ -6578.3549]]
best b 340412.6596


In [37]:
print('mean\n', avg8)

mean
 sqft    2000.680851
rm         3.170213
dtype: float64


In [38]:
print('std deviation\n', stddev8)

std deviation
 sqft    786.202619
rm        0.752843
dtype: float64


In [39]:
avg8.shape

(2,)

In [40]:
stddev8.shape

(2,)

In [41]:
coef8.shape

(2, 1)

In [42]:
avg8 = np.array(avg8).reshape((2,1))
stddev8 = np.array(stddev8).reshape((2,1))

In [43]:
# Scale back own GD
or_coef_gd8 = coef8 / stddev8
or_coef_gd8

array([[  139.21067402],
       [-8738.01911233]])

In [44]:
or_intercept_gd8 = intercept8 - (coef8[0] * avg8[0] / stddev8[0]) - (coef8[1] * avg8[1] / stddev8[1])
or_intercept_gd8

array([89597.90954279])

In [45]:
or_intercept_gd8 = intercept8 + np.negative(coef8 * avg8 / stddev8).sum()
or_intercept_gd8

89597.9095427946

In [46]:
std_scaler8 = StandardScaler()
normalized_arr8 = std_scaler8.fit_transform(X_train8)
reg8 = LinearRegression().fit(normalized_arr8, y_train8)
print('w',reg8.coef_)
print('b',reg8.intercept_)

w [109447.79646964  -6578.35485416]
b 340412.6595744681


In [47]:
print('std dev:',std_scaler8.scale_)
print('avg:',std_scaler8.mean_)

std dev: [7.86202619e+02 7.52842809e-01]
avg: [2000.68085106    3.17021277]


In [48]:
or_coef_sk2_8 = reg8.coef_ / std_scaler8.scale_
or_coef_sk2_8

array([  139.21067402, -8738.01911233])

In [49]:
or_intercept_sk2_8 = reg8.intercept_ + np.negative(reg8.coef_ * std_scaler8.mean_ / std_scaler8.scale_).sum() 
or_intercept_sk2_8

89597.90954279763

In [50]:
def reverse_bw_std_norm(coef, intercept, avg, stddev):
    '''
    The following is to reverse the coeficient and intercept after using std_norm.
    Please use the same mean and std deviation from std_norm
    This function can use for one feature or multiple features regression
    '''
    or_coef_gd = coef / stddev
    or_intercept_gd = intercept + np.negative(coef * avg / stddev).sum()
    return or_coef_gd, or_intercept_gd

In [51]:
reverse_bw_std_norm(coef8, intercept8, avg8, stddev8)

(array([[  139.21067402],
        [-8738.01911233]]),
 89597.9095427946)

### Scaling Back Coefficient and Intercept for MinMax Scaler

For MinMax Scaler, the formula is

$$x_{scaled} = \frac{x - min(x)}{max(x)-min(x)}$$

To get prediction, we have 

$$\hat{y} = b_{scaled} + x_{1 scaled} * w_{1 scaled}+ x_{2 scaled} * w_{2 scaled}$$

Substitute $x_{scaled}$

$$\hat{y} = b_{scaled} + \frac{x_1 - min(x_1)}{max(x_1)-min(x_1)} * w_{1scaled} + \frac{x_2 - min(x_2)}{max(x_2)-min(x_2)} * w_{2scaled}$$

$$\hat{y} = b_{scaled} + \left(\frac{x_1 - min(x_1)}{max(x_1)-min(x_1)}\right) w_{1scaled} + \left(\frac{x_2 - min(x_2)}{max(x_2)-min(x_2)}\right) w_{2scaled}$$

$$\hat{y} = b_{scaled} + \frac{x_1w_{1scaled} - min(x_1)w_{1scaled}}{max(x_1)-min(x_1)} + \frac{x_2w_{2scaled} - min(x_2)w_{2scaled}}{max(x_2)-min(x_2)}$$


$$\hat{y} = b_{scaled} + \frac{x_1w_{1scaled} - min(x_1)w_{1scaled}}{max(x_1)-min(x_1)} + \frac{x_2w_{2scaled} - min(x_2)w_{2scaled}}{max(x_2)-min(x_2)}$$
$$\hat{y} = b_{scaled} + \frac{x_1w_{1scaled}}{max(x_1)-min(x_1)} - \frac{min(x_1)w_{1scaled}}{max(x_1)-min(x_1)} +  \frac{x_2w_{2scaled}}{max(x_2)-min(x_2)} - \frac{min(x_2)w_{2scaled}}{max(x_2)-min(x_2)}$$
$$\hat{y} = b_{scaled} + x_1\left(\frac{w_{1scaled}}{max(x_1)-min(x_1)}\right) - \frac{min(x_1)w_{1scaled}}{max(x_1)-min(x_1)} + x_2\left(\frac{w_{2scaled}}{max(x_2)-min(x_2)}\right) - \frac{min(x_2)w_{2scaled}}{max(x_2)-min(x_2)}$$
$$\hat{y} = b + xw$$
$$\hat{y} = b_{scaled}- \frac{min(x_1)w_{1scaled}}{max(x_1)-min(x_1)}- \frac{min(x_2)w_{2scaled}}{max(x_2)-min(x_2)} = + x_1\left(\frac{w_{1scaled}}{max(x_1)-min(x_1)}\right) + x_2\left(\frac{w_{2scaled}}{max(x_2)-min(x_2)}\right) $$

In [52]:
X_train9 = df2[['sqft','rm']]
y_train9 = df2['price']

In [53]:
# SciKit Learn Normal data
lr9 = LinearRegression().fit(X_train9, y_train9)
print('w',lr9.coef_)
print('b',lr9.intercept_)

w [  139.21067402 -8738.01911233]
b 89597.90954279748


In [54]:
X_norm9, min9, range9 = minmax_scaling(X_train9)
coef9, intercept9, _, _, _ = my.compute_gradient_descent(X_norm9, y_train9, iterations = 200000)

iteration 199999: Last cost = 2.0433e+09: intercept = 1.9947e+05: weights = [[504777.90398781 -34952.07644922]]
best w [[504777.904 ]
 [-34952.0764]]
best b 199467.3847


In [55]:
min9.shape

(2,)

In [56]:
min9 = np.array(min9).reshape(-1,1)
min9

array([[852],
       [  1]])

In [57]:
range9 = np.array(range9).reshape(-1,1)
range9

array([[3626],
       [   4]])

In [58]:
coef9.shape

(2, 1)

In [59]:
# Scale back own GD
or_coef_gd9 = coef9 / range9
or_coef_gd9

array([[  139.21067402],
       [-8738.0191123 ]])

In [60]:
or_intercept_gd9 = intercept9 - (coef9[0] * min9[0] / range9[0]) - (coef9[1] * min9[1] / range9[1])
or_intercept_gd9

array([89597.90954277])

In [61]:
or_intercept_gd9 = intercept9 + np.negative(coef9 *  min9 / range9).sum()
or_intercept_gd9

89597.9095427743

In [62]:
minmax_scaler9 = MinMaxScaler()
normalized_arr9 = minmax_scaler9.fit_transform(X_train9)
reg9 = LinearRegression().fit(normalized_arr9, y_train9)
print('w',reg9.coef_)
print('b',reg9.intercept_)

w [504777.90398791 -34952.07644931]
b 199467.38469348656


In [63]:
minmax_scaler9.data_max_

array([4478.,    5.])

In [64]:
minmax_scaler9.data_min_

array([852.,   1.])

In [65]:
minmax_scaler9.data_range_

array([3626.,    4.])

In [66]:
minmax_scaler9.data_range_.shape

(2,)

In [67]:
or_coef_sk2_9 = reg9.coef_ / minmax_scaler9.data_range_
or_coef_sk2_9

array([  139.21067402, -8738.01911233])

In [68]:
or_intercept_sk2_9 = reg9.intercept_ + np.negative(reg9.coef_ *  minmax_scaler9.data_min_ / minmax_scaler9.data_range_).sum() 
or_intercept_sk2_9

89597.9095427974

In [69]:
def reverse_bw_minmax_scaler(coef, intercept, min, range):
    '''
    The following is to reverse the coeficient and intercept after using MinMax Scaling.
    Please use the same minimum and range from minmax_scaling
    This function can use for one feature or multiple features regression
    '''
    or_coef_gd = coef / range
    or_intercept_gd = intercept + np.negative(coef * min / range).sum()
    return or_coef_gd, or_intercept_gd

In [70]:
reverse_bw_std_norm(coef9, intercept9, min9, range9)

(array([[  139.21067402],
        [-8738.0191123 ]]),
 89597.9095427743)

## Finalize Function of Z Score and the Function to Scale Back the Coefficient

In [71]:
def std_norm(X):
    """
    This function is z-score normalizer.
    Formula is x(scaled) = x - mean / {std deviation of x}
    There is similar scaler in sklearn is StandardScaler

    INPUT:
    X = The features dataset
    Dataset should be in column features with shape (m,n) where m is number of observations
    and n is total number of features

    RETURN:
    X_norm = Scaled dataset
    avg = Mean of each column features
    std = Standard Deviation of each column features
    """
    ### the following check if data type is Series
    ### if is Series convert to data frame
    if isinstance(X, pd.Series):
        X = X.to_frame()

    if isinstance(X, pd.DataFrame):
        X = X.to_numpy()

    if X.ndim == 1:
        np.array(X).reshape(-1,1)
        
    n = X.shape[1]
    
    avg = X.mean(axis=0).reshape((1,n))

    std = X.std(axis=0).reshape((1,n))
    
    X_norm = (X - avg) / std
    
    return X_norm, avg, std

In [72]:
def reverse_bw_std_norm(coef, intercept, avg, stddev):
    '''
    The following is to reverse the coeficient and intercept after using std_norm.
    Please use the same mean and std deviation from std_norm
    This function can use for one feature or multiple features regression

    INPUT:
    coef = The weights / coefficient of the trained dataset
    intercept = The intercept of the trained dataset
    avg = The average return from the function std_norm
    stddev = The standard deviation from the function std_norm

    RETURN:
    or_coef_gd = The weights / coefficient in un-scaled form
    or_intercept_gd = The intercept in un-scaled form
    
    '''
    n = coef.shape[0]
    ### reshape average and stddev
    avg = avg.reshape((n,1))
    stddev = stddev.reshape((n,1))

    or_coef_gd = coef / stddev
    or_intercept_gd = intercept + np.negative(coef * avg / stddev).sum()
    return or_coef_gd, or_intercept_gd

### Application of Finalized Function (One Feature Housing Data)

In [73]:
x_train10 = df['sqft']
y_train10 = df['price']
x_train10 = x_train10.to_numpy().reshape(-1,1)
y_train10 = y_train10.to_numpy().reshape(-1,1)

In [74]:
# The following is to train the model using normal data
# This is for comparison purpose
lr10 = LinearRegression().fit(x_train10, y_train10)
print('w',lr10.coef_)
print('b',lr10.intercept_)

w [[134.52528772]]
b [71270.49244873]


In [75]:
# SciKit Learn Scaling with Regression
std_scaler10 = StandardScaler()
normalized_arr10 = std_scaler10.fit_transform(x_train10)
reg10 = LinearRegression().fit(normalized_arr10, y_train10)
print('w',reg10.coef_)
print('b',reg10.intercept_)

w [[105764.13349282]]
b [340412.65957447]


In [76]:
x_norm10, avg10, stddev10 = std_norm(x_train10)
coef10, intercept10, _, _, _ = my.compute_gradient_descent(x_norm10, y_train10)

iteration 9999: Last cost = 2.0581e+09: intercept = 3.4041e+05: weights = [[105764.13349281]]
best w [[105764.1335]]
best b 340412.6596


In [77]:
coef_or10, intercept_or10 = reverse_bw_std_norm(coef10, intercept10, avg10, stddev10)
print(coef_or10,intercept_or10)

[[134.52528772]] 71270.49244872801


In [78]:
myAsk = 2104
myAsk_array10 = np.array(myAsk).reshape(-1,1)

In [79]:
lr10.predict(myAsk_array10)

array([[354311.69781212]])

In [80]:
my.prediction(myAsk_array10, intercept_or10, coef_or10)

array([[354311.69781211]])

In [81]:
reg10.predict(std_scaler10.transform(myAsk_array10))

array([[354311.69781212]])

In [82]:
# Predicting new query

In [83]:
myAskTwo = 3800
myAsk_arrayTwo = np.array(myAskTwo).reshape(-1,1)

In [84]:
lr10.predict(myAsk_arrayTwo)

array([[582466.58578565]])

In [85]:
my.prediction(myAsk_arrayTwo, intercept_or10, coef_or10)

array([[582466.58578564]])

In [86]:
reg10.predict(std_scaler10.transform(myAsk_arrayTwo))

array([[582466.58578565]])

### Application of Finalized Function (Two Features Housing Data)

In [87]:
X2_train11 = df2[['sqft','rm']]
y2_train11 = df2['price']
X2_train11 = X2_train11.to_numpy()
y2_train11 = y2_train11.to_numpy()

In [88]:
# Normal data
lr11 = LinearRegression().fit(X2_train11, y2_train11)
print('w',lr11.coef_)
print('b',lr11.intercept_)

w [  139.21067402 -8738.01911233]
b 89597.90954279748


In [89]:
# SciKit Learn Scaling with Regression
std_scaler11 = StandardScaler()
normalized_arr11 = std_scaler11.fit_transform(X2_train11)
reg11 = LinearRegression().fit(normalized_arr11, y2_train11)
print('w',reg11.coef_)
print('b',reg11.intercept_)

w [109447.79646964  -6578.35485416]
b 340412.6595744681


In [90]:
# Our Function
X2_norm11, avg11, stddev11 = std_norm(X2_train11)
coef11, intercept11, _, _, _ = my.compute_gradient_descent(X2_norm11, y2_train11)

iteration 9999: Last cost = 2.0433e+09: intercept = 3.4041e+05: weights = [[109447.79646964  -6578.35485416]]
best w [[109447.7965]
 [ -6578.3549]]
best b 340412.6596


In [91]:
coef_or11, intercept_or11 = reverse_bw_std_norm(coef11, intercept11, avg11, stddev11)
print('w',coef_or11)
print('b',intercept_or11)

w [[  139.21067402]
 [-8738.01911233]]
b 89597.90954279466


#### SciKit Learn prediction

In [92]:
myAskOne = [2104, 3]
myAsk_array11 = np.array(myAskOne).reshape(1,-1)

In [93]:
lr11.predict(myAsk_array11)

array([356283.1103389])

In [94]:
my.prediction(myAsk_array11, intercept_or11, coef_or11)

array([[356283.11033889]])

In [95]:
reg11.predict(std_scaler11.transform(myAsk_array11))

array([356283.1103389])

#### Making new query

In [96]:
myAskTwo = [3800,4]
myAskTwo_array11 = np.array(myAskTwo).reshape(1,-1)

In [97]:
lr11.predict(myAskTwo_array11)

array([583646.39436046])

In [98]:
my.prediction(myAskTwo_array11, intercept_or11, coef_or11)

array([[583646.39436046]])

In [99]:
reg11.predict(std_scaler11.transform(myAskTwo_array11))

array([583646.39436046])

## Finalize Function of MinMax Scaling and the Function to Scale Back the Coefficient

In [100]:
def minmax_scaling(X):
    """
    This function is to replicate the same method as SciKit Learn MinMaxScaler
    Formula is x(scaled) = x - min(x) / max(x) - min(x)
    This function produce similar result SciKit Learn MinMaxScaler

    INPUT:
    X = The features dataset
    Dataset should be in column features with shape (m,n) where m is number of observations
    and n is total number of features

    RETURN:
    scaled = Scaled dataset
    minimum = Minimum of each column features
    range = Maximum of each column features MINUS Minimum of each column features
    """
    ### the following check if data type is Series
    ### if is Series convert to data frame
    if isinstance(X, pd.Series):
        X = X.to_frame()

    if isinstance(X, pd.DataFrame):
        X = X.to_numpy()

    if X.ndim == 1:
        np.array(X).reshape(-1,1)
        
    n = X.shape[1]
    
    maximum = X.max(axis=0)
    minimum = X.min(axis=0)
    range = (maximum - minimum)
    scaled = (X - minimum) / range
    return scaled, minimum.reshape(-1,1), range.reshape(-1,1)

In [101]:
def reverse_bw_minmax_scale(coef, intercept, min, range):
    '''
    The following is to reverse the coeficient and intercept after using minmax_scaling.
    Please use the same minimum and range from minmax_scaling
    This function can use for one feature or multiple features regression

    INPUT:
    coef = The weights / coefficient of the trained dataset
    intercept = The intercept of the trained dataset
    min = The minimum return from the function minmax_scaling
    range = The range (max - min) for each column features from the function minmax_scaling

    RETURN:
    or_coef_gd = The weights / coefficient in un-scaled form
    or_intercept_gd = The intercept in un-scaled form
    
    '''
    or_coef_gd = coef / range
    or_intercept_gd = intercept + np.negative(coef * min / range).sum()
    return or_coef_gd, or_intercept_gd


### Application of Finalized Function (One Feature Housing Data)

In [102]:
x_train12 = df['sqft']
y_train12 = df['price']
x_train12 = x_train12.to_numpy().reshape(-1,1)
y_train12 = y_train12.to_numpy().reshape(-1,1)

In [103]:
# Normal data
lr12 = LinearRegression().fit(x_train12, y_train12)
print('w',lr12.coef_)
print('b',lr12.intercept_)

w [[134.52528772]]
b [71270.49244873]


In [104]:
# SciKit Learn Scaling with Regression
minmax_scaler12 = MinMaxScaler()
normalized_arr12 = minmax_scaler12.fit_transform(x_train12)
reg12 = LinearRegression().fit(normalized_arr12, y_train12)
print('w',reg12.coef_)
print('b',reg12.intercept_)

w [[487788.6932736]]
b [185886.03758637]


In [105]:
# Our Function
x_norm12, min12, range12 = minmax_scaling(x_train12)
coef12, intercept12, _, _, _ = my.compute_gradient_descent(x_norm12, y_train12, iterations=200000)

iteration 199999: Last cost = 2.0581e+09: intercept = 1.8589e+05: weights = [[487788.69327352]]
best w [[487788.6933]]
best b 185886.0376


In [106]:
coef_or12, intercept_or12 = reverse_bw_minmax_scale(coef12, intercept12, min12, range12)
print('w',coef_or12)
print('b',intercept_or12)

w [[134.52528772]]
b 71270.49244877003


In [107]:
myAsk = 2104
myAsk_array12 = np.array(myAsk).reshape(-1,1)

In [108]:
lr12.predict(myAsk_array12)

array([[354311.69781212]])

In [109]:
my.prediction(myAsk_array12, intercept_or12, coef_or12)

array([[354311.69781212]])

In [110]:
reg12.predict(minmax_scaler12.transform(myAsk_array12))

array([[354311.69781212]])

In [111]:
# Predicting new query

In [112]:
myAskTwo = 3800
myAsk_arrayTwo = np.array(myAskTwo).reshape(-1,1)

In [113]:
lr12.predict(myAsk_arrayTwo)

array([[582466.58578565]])

In [114]:
my.prediction(myAsk_arrayTwo, intercept_or12, coef_or12)

array([[582466.58578561]])

In [115]:
reg12.predict(minmax_scaler12.transform(myAsk_arrayTwo))

array([[582466.58578565]])

### Application of Finalized Function (Two Features Housing Data)

In [116]:
X2_train14 = df2[['sqft','rm']]
y2_train14 = df2['price']
X2_train14 = X2_train14.to_numpy()
y2_train14 = y2_train14.to_numpy()

In [117]:
# Normal data
lr14 = LinearRegression().fit(X2_train14, y2_train14)
print('w',lr14.coef_)
print('b',lr14.intercept_)

w [  139.21067402 -8738.01911233]
b 89597.90954279748


In [118]:
# SciKit Learn Scaling with Regression
minmax_scaler14 = MinMaxScaler()
normalized_arr14 = minmax_scaler14.fit_transform(X2_train14)
reg14 = LinearRegression().fit(normalized_arr14, y2_train14)
print('w',reg14.coef_)
print('b',reg14.intercept_)

w [504777.90398791 -34952.07644931]
b 199467.38469348656


In [119]:
# Our Function
X2_norm14, min14, range14 = minmax_scaling(X2_train14)
coef14, intercept14, _, _, _ = my.compute_gradient_descent(X2_norm14, y2_train14, iterations=200000)

iteration 199999: Last cost = 2.0433e+09: intercept = 1.9947e+05: weights = [[504777.90398781 -34952.07644922]]
best w [[504777.904 ]
 [-34952.0764]]
best b 199467.3847


In [120]:
coef_or14, intercept_or14 = reverse_bw_minmax_scale(coef14, intercept14, min14, range14)
print('w',coef_or14)
print('b',intercept_or14)

w [[  139.21067402]
 [-8738.0191123 ]]
b 89597.9095427743


In [121]:
myAskOne = [2104, 3]
myAsk_array14 = np.array(myAskOne).reshape(1,-1)

In [122]:
lr14.predict(myAsk_array14)

array([356283.1103389])

In [123]:
my.prediction(myAsk_array14, intercept_or14, coef_or14)

array([[356283.11033889]])

In [124]:
reg14.predict(minmax_scaler14.transform(myAsk_array14))

array([356283.1103389])

In [125]:
# Predicting new query

In [126]:
myAskTwo = [3800,4]
myAsk_arrayTwo = np.array(myAskTwo).reshape(1,-1)

In [127]:
lr14.predict(myAsk_arrayTwo)

array([583646.39436046])

In [128]:
my.prediction(myAsk_arrayTwo, intercept_or14, coef_or14)

array([[583646.39436043]])

In [129]:
reg14.predict(minmax_scaler14.transform(myAsk_arrayTwo))

array([583646.39436046])

## End Note 10