In [376]:
import pandas as pd
import numpy as np
import random

In [377]:
!gdown --id 1xmaX9QHTWnKe58oIMT1ohnhPLU15WQ2O

Downloading...
From: https://drive.google.com/uc?id=1xmaX9QHTWnKe58oIMT1ohnhPLU15WQ2O
To: /content/House_Price_2.csv
100% 2.46k/2.46k [00:00<00:00, 3.68MB/s]


In [378]:
df = pd.read_csv("House_Price_2.csv")
df.tail(5)

Unnamed: 0,width,length,price
95,23,94,305595500.0
96,10,69,189080800.0
97,11,35,130538000.0
98,8,66,170194800.0
99,18,28,145525400.0


In [379]:
random.seed(1)

In [380]:
#define predictors and label variable
train_variable=['width', 'length']
target_variable='price'
X=df[train_variable].values
y=df[target_variable].values.reshape(-1, 1)

## Gradient Descent Function

In [381]:
def initialize_parameters(X):
  number_of_param=X.shape[1]
  W=np.random.rand(number_of_param,1)
  b=np.array(random.randint(1, 200))
  return W, b

def forward_propergate(W, b, X, y):
  y_hat=np.dot(X, W) + b # W.shape=(param, 1) X.shape=(m, params)
  return y_hat 

def cost(y_hat, y):
  return np.mean(np.square(y_hat-y)) 
  
def backward_propergate(W, b, y_hat, y):
  m = X.shape[0]
  dW=(-2/m) * np.dot(X.T, y-y_hat) #X.shape=(m, param) , error.shape=(m, 1) 
  db=(-2/m) * np.sum(y-y_hat)
  return dW, db 

def update_params(W, b, dW, db, learning_rate):
  W=W-learning_rate*dW
  b=b-learning_rate*db
  return W, b 

In [382]:
def gradient_descent_linear_regression(
    X, 
    y, 
    iterations=10000, 
    learning_rate=0.0000001, 
    print_cost=True):
  W, b=initialize_parameters(X) 
  for i in range(0, iterations):
    y_hat=forward_propergate(W, b, X, y) 
    loss=cost(y_hat, y)
    if i % 10000==0 and print_cost==True:
      print("Loss at iteration {} : {} ".format(i, loss))
    dW, db = backward_propergate(W, b, y_hat, y) 
    W,b = update_params(W, b, dW, db, learning_rate)
  params={
      "W":W, 
      "b":b
  }
  final_loss=loss
  return params, final_loss

## Chạy mô hình

##### **a. Gradient Descent**

- Đối với learning rate, ta thử các giá trị lớn nhất, sau đó giảm dần, bắt đầu từ learning_rate=0.0001 

**Chọn Learning rate**

- learning_rate=0.0001

In [385]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=10000, 
    learning_rate=0.0001, 
    print_cost=True)

Loss at iteration 0 : 4.5991796816728824e+16 


- learning_rate=0.00001

In [386]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=10000, 
    learning_rate=0.00001, 
    print_cost=False)
print(params, loss) 

{'W': array([[5008428.18190986],
       [2003719.05568626]]), 'b': 144686.97027526825} 8673328575084.305


- learning_rate=0.000001

In [387]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=10000, 
    learning_rate=0.000001, 
    print_cost=False)
print(params, loss) 

{'W': array([[3861267.8132835 ],
       [2303232.33015312]]), 'b': 109749.5733665721} 99975308469450.55


*Ta chọn learning rate bằng learning_rate=0.00001*

**Chọn số vòng lặp**

- Ta tăng số vòng lặp lên dần từ 10000

In [388]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=100000, 
    learning_rate=0.00001, 
    print_cost=False)
print(params, loss)

{'W': array([[5006052.36824459],
       [2002801.14792364]]), 'b': 249103.21205088263} 8661182313304.617


In [389]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=1000000, 
    learning_rate=0.00001, 
    print_cost=False)
print(params, loss)

{'W': array([[4995147.66368495],
       [1998616.58188949]]), 'b': 726423.9641676822} 8630577260027.697


Ta thấy sau vòng lặp thứ 10.000 thì số Loss giảm rất chậm, Ta chọn iterations=100000 (vì chạy không quá lâu)  

##### b. Ta standardize input trước khi cho vào mô hình: 

In [390]:
def standardization(X):
  X_new = np.zeros((X.shape[0], X.shape[1]))
  for i in range(0, X.shape[1]):
    X_new[:, i]=(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i])
  return X_new

In [391]:
X_standardized=standardization(X) 
params_input_normed, loss_input_normed=gradient_descent_linear_regression(
    X_standardized, 
    y, 
    iterations=100000, 
    learning_rate=0.00001, 
    print_cost=False)
print(params_input_normed, loss_input_normed)

{'W': array([[9.47972791e+07],
       [1.07607201e+08]]), 'b': 176964866.11795855} 9789494367358078.0


Cùng số vòng lặp và learning rate nhưng loss không tốt hơn sau khi standardise nên ta vẫn giữ phương án ban đầu với: 
- iterations=100000, 
- learning_rate=0.00001,  

#### c. So sánh các phương pháp khác: 

- Dùng gradient Descent: 

In [392]:
params, loss=gradient_descent_linear_regression(
    X, 
    y, 
    iterations=1000000, 
    learning_rate=0.00001, 
    print_cost=False)
print(params, loss)

{'W': array([[4995147.70537699],
       [1998616.59788837]]), 'b': 726422.1392237971} 8630577298866.36


In [393]:
W_grads=params.get("W")
b_grads=params.get("b")
print("W = {}, b = {}".format(W_grads, b_grads))  

W = [[4995147.70537699]
 [1998616.59788837]], b = 726422.1392237971


- Dùng công thức

In [394]:
def closed_form_params(X, y):
  '''
  input: X, y
  output: W (parameters of linear function) with shape (1, number of parameters)
  ''' 
  sample_size=X.shape[0] 
  Z = np.concatenate([X, np.ones((sample_size,1))], axis=-1)
  W_b=np.matmul(np.matmul(
                      np.linalg.inv(np.matmul(Z.T,Z)), Z.T
                      ),
               y).reshape(1, -1)
  assert (W_b.shape == (1, X.shape[1]+1)) 
  W=W_b[:, 0:X.shape[1]].reshape(-1,1)
  b=W_b[:, -1][0]
  return W, b
   

In [395]:
W_closed_form, b_closed_form =closed_form_params(X, y) 
print("W = {}, b = {}".format(W_closed_form, b_closed_form)) 

W = [[4992438.83485118]
 [1997577.09722778]], b = 844994.8258748334


- SKlearn

In [396]:
## SkLearn
from sklearn.linear_model import LinearRegression
lr = LinearRegression().fit(X, y)                ## train LinearRegression 
W_sklearn, b_sklearn = lr.coef_.reshape(-1,1), lr.intercept_[0]  ## lấy các tham số học được
print("W = {}, b = {}".format(W_sklearn, b_sklearn))

W = [[4992438.83485116]
 [1997577.09722778]], b = 844994.8258749545


**So sánh MSE các phương pháp:**

In [397]:
def mean_square_error(X, y, W, b):
  y_hat=np.dot(X, W) + b
  error=y-y_hat
  return np.mean(np.square(error))

In [398]:
print("MSE Gradient Descent:", mean_square_error(X, y, W_grads, b_grads)) 
print("MSE Closed Form     :", mean_square_error(X, y, W_closed_form, b_closed_form)) 
print("MSE Sklearn.        :", mean_square_error(X, y, W_sklearn, b_sklearn)) 

MSE Gradient Descent: 8630577294339.747
MSE Closed Form     : 8629315550589.733
MSE Sklearn.        : 8629315550589.731
