In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
data = np.genfromtxt('advertising.csv',dtype="float64",delimiter=',')


In [2]:
data

array([[230.1,  37.8,  69.2,  22.1],
       [ 44.5,  39.3,  45.1,  10.4],
       [ 17.2,  45.9,  69.3,  12. ],
       [151.5,  41.3,  58.5,  16.5],
       [180.8,  10.8,  58.4,  17.9],
       [  8.7,  48.9,  75. ,   7.2],
       [ 57.5,  32.8,  23.5,  11.8],
       [120.2,  19.6,  11.6,  13.2],
       [  8.6,   2.1,   1. ,   4.8],
       [199.8,   2.6,  21.2,  15.6],
       [ 66.1,   5.8,  24.2,  12.6],
       [214.7,  24. ,   4. ,  17.4],
       [ 23.8,  35.1,  65.9,   9.2],
       [ 97.5,   7.6,   7.2,  13.7],
       [204.1,  32.9,  46. ,  19. ],
       [195.4,  47.7,  52.9,  22.4],
       [ 67.8,  36.6, 114. ,  12.5],
       [281.4,  39.6,  55.8,  24.4]])

### **Giới thiệu data**

Data bao gồm 4 columns: TV, Radio, Newspaper, Sales

![Alt text](image.png)

Trong đây, những columns: TV, Radio, Newspaper được xem là feature X, Sales là kết quả cần dự đoán - xem là y 

Vậy thì ta sẽ có công thức: 

#### y = ax<sub>1</sub> + bx<sub>2</sub> + cx<sub>3</sub> +d

Với: 
 - x<sub>1</sub> là TV
 - x<sub>2</sub> là Radio
 - x<sub>3</sub> là Newspaper
 - a,b,c,d là các trọng số cần tìm

In [3]:
class LinearRegression_GradientDescent:
    def __init__(self,lr=0.01,n_epoch=1000):
        self.lr=lr
        self.n_epoch=n_epoch
        self.a = random.random() - 0.5
        self.b = random.random() - 0.5
        self.c = random.random() - 0.5
        self.d = random.random() - 0.5
    def fit(self, X,Y):
        N = len(X) # number of samples
        for i in range(self.n_epoch):
            y_hat = self.a*X[:,0] + self.b*X[:,1] + self.c*X[:,2]
            f = y_hat - Y
            loss = np.sum(f**2)/N
            # Updating m and b
            self.a -= self.lr * (2 * X[:,0].dot(f).sum() / N)
            self.b -= self.lr * (2 * X[:,1].dot(f).sum() / N)
            self.c -= self.lr * (2 * X[:,2].dot(f).sum() / N)
            self.d -= self.lr * (2 * f.sum() / N)

            print("Epoch",i+1,",loss",loss)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data[:,:3], data[:,3], test_size=0.33, random_state=42)
X_train.shape

(12, 3)

In [38]:
model = LinearRegression_GradientDescent(lr=0.000015)
model.fit(X_train, y_train)


Epoch 1 ,loss 3650.1943554349205
Epoch 2 ,loss 155.49226959278005
Epoch 3 ,loss 37.14117637814038
Epoch 4 ,loss 30.42966915656142
Epoch 5 ,loss 27.71222697832506
Epoch 6 ,loss 25.40548246919717
Epoch 7 ,loss 23.359957699722827
Epoch 8 ,loss 21.542884560625314
Epoch 9 ,loss 19.92847290732806
Epoch 10 ,loss 18.493941606129805
Epoch 11 ,loss 17.21907516004978
Epoch 12 ,loss 16.085931692085722
Epoch 13 ,loss 15.078587116950928
Epoch 14 ,loss 14.182908196241229
Epoch 15 ,loss 13.386351121869017
Epoch 16 ,loss 12.677782753067815
Epoch 17 ,loss 12.047321958988617
Epoch 18 ,loss 11.486198805594098
Epoch 19 ,loss 10.986629579889998
Epoch 20 ,loss 10.541705870257113
Epoch 21 ,loss 10.14529612198458
Epoch 22 ,loss 9.791958264909812
Epoch 23 ,loss 9.476862167877456
Epoch 24 ,loss 9.195720814788297
Epoch 25 ,loss 8.944729221314796
Epoch 26 ,loss 8.720510221685092
Epoch 27 ,loss 8.520066352854142
Epoch 28 ,loss 8.340737150284694
Epoch 29 ,loss 8.180161246690512
Epoch 30 ,loss 8.036242733549562
Epoch

### **Vectorizing**

#### y = ax<sub>1</sub> + bx<sub>2</sub> + cx<sub>3</sub> ... +d
Giả sử ta có nhiều trọng số thì ta phải làm như thế nào để tối giản code lại. 

1. Ta gom các trọng số a,b,c lại thành một ma trận trọng số W:
$$
W=
\left(\begin{array}{cc} 
a\\ 
b\\
c
\end{array}\right)

, shape = (3,1)
$$ 

2. Ta gom các biến x1,x2,x3 lại thành một ma trận X: 
$$
X=
\left(\begin{array}{cc} 
x_{1_1} & x_{2_1} & x_{3_1}\\ 
x_{1_2} & x_{2_2} & x_{3_2}\\ 
x_{1_3} & x_{2_3} & x_{3_3}\\ 
... & ... & ...\\
x_{1_{12}} & x_{2_{12}} & x_{3_{12}}
\end{array}\right)

, shape = (12,3)
$$ 

3. Để tính y dự đoán ta có: $$\hat{y} = X*W + d$$
4. Tương tự như vậy, ta sẽ có ma trận của giá trị đạo hàm trọng số: 
$$
dW=
\left(\begin{array}{cc} 
dA\\ 
dB\\ 
dC\\ 
\end{array}\right)
=2*(y-\hat{y})*X
$$
$$dD =2*(y-\hat{y})
$$

5. Và cập nhật trọng số bằng cách: 
$$
W -= lr* dW
$$
$$
d -= lr* dD
$$

In [5]:
class VectorizedLinearRegression_GradientDescent:
    def __init__(self,lr=0.01,n_epoch=1000):
        self.lr=lr
        self.n_epoch=n_epoch
        self.W = np.random.rand(3,1) - 0.5
        self.d = random.random() - 0.5
    def fit(self, X,Y):
        N = len(X) # number of samples
        for i in range(self.n_epoch):
            y_hat = np.dot(X,self.W) + self.d
            f = y_hat - Y
            loss = np.sum(f**2)/N
            # Updating m and b
            self.W -= self.lr * (2 * X.T.dot(f) / N)
            self.d -= self.lr * (2 * f.sum()/N)

            print("Epoch",i+1,",loss",loss)

In [8]:
model = VectorizedLinearRegression_GradientDescent(lr=0.00001,n_epoch=100000)
model.fit(X_train, np.array(y_train).reshape(-1,1))


Epoch 1 ,loss 1925.1893204863602
Epoch 2 ,loss 562.2147376406483
Epoch 3 ,loss 269.7583387322615
Epoch 4 ,loss 199.12752600237505
Epoch 5 ,loss 175.0595512806996
Epoch 6 ,loss 161.31579053992107
Epoch 7 ,loss 150.36144978677186
Epoch 8 ,loss 140.591284273868
Epoch 9 ,loss 131.62788569122014
Epoch 10 ,loss 123.35079948946105
Epoch 11 ,loss 115.69583695668717
Epoch 12 ,loss 108.6134022756816
Epoch 13 ,loss 102.05966428101492
Epoch 14 ,loss 95.99452558176797
Epoch 15 ,loss 90.38101068071762
Epoch 16 ,loss 85.18496161218857
Epoch 17 ,loss 80.37481054610133
Epoch 18 ,loss 75.92138074304926
Epoch 19 ,loss 71.79770488075214
Epoch 20 ,loss 67.97885761298964
Epoch 21 ,loss 64.4418009067802
Epoch 22 ,loss 61.16524111240407
Epoch 23 ,loss 58.12949686170008
Epoch 24 ,loss 55.31637697129693
Epoch 25 ,loss 52.709067592420745
Epoch 26 ,loss 50.2920279068926
Epoch 27 ,loss 48.05089372208297
Epoch 28 ,loss 45.972388366627065
Epoch 29 ,loss 44.04424033401282
Epoch 30 ,loss 42.25510716302302
Epoch 31 ,lo

In [12]:
## Recheck :)
y_hat = np.dot(X_test,model.W) + model.d
print("Loss of testing data:",np.sum((y_hat-y_test.reshape(-1,1))**2)/len(y_test))
print("X is: ",X_test)
print("Predicted: ",y_hat)
print("Truth: ",y_test)

Loss of testing data: 8.65041602547768
X is:  [[230.1  37.8  69.2]
 [ 44.5  39.3  45.1]
 [  8.6   2.1   1. ]
 [  8.7  48.9  75. ]
 [151.5  41.3  58.5]
 [ 97.5   7.6   7.2]]
Predicted:  [[23.41972558]
 [10.70469974]
 [ 1.92444679]
 [10.73310527]
 [18.38890807]
 [ 8.62571524]]
Truth:  [22.1 10.4  4.8  7.2 16.5 13.7]
