In [20]:
import pandas as pd
import numpy as np

In [21]:
# Đọc dữ liệu từ CSV
df = pd.read_csv("data/advertising.csv")
X = df[["TV", "Radio", "Newspaper"]].values
y = df["Sales"].values.reshape(-1, 1)


In [22]:
# Chuẩn hóa dữ liệu
X = (X - X.mean(axis=0)) / X.std(axis=0)

In [None]:
n_samples, n_features = X.shape

print(n_samples)
print(n_features)

w = np.zeros(n_features)

print(X.shape)
print(w.shape)
print(y.shape)

In [36]:
# Hàm huấn luyện Linear Regression
def train_linear_regression(X, y, lr=0.01, epochs=100):
    n_samples, n_features = X.shape
    w = np.zeros((n_features, 1))
    b = 0
    losses = []

    for _ in range(epochs):
        y_pred = np.dot(X, w) + b
        
        loss = (1/n_samples) * np.sum((y_pred-y)**2)
        losses.append(loss)


        dw = (2/n_samples)*np.dot(X.T, (y_pred-y))
        db = (2/n_samples)*np.sum(y_pred-y)

        w -= lr*dw
        b -= lr*db

    return w, b, losses

In [37]:
print("Shape of X:", X.shape)
print("Shape of X transpose:", X.T.shape)
print("Shape of y:", y.shape)
print("Shape of w", w.shape)
w1, b1, loss_history1 = train_linear_regression(X, y)
print(w1)
print(b1)
print(loss_history1)

Shape of X: (200, 3)
Shape of X transpose: (3, 200)
Shape of y: (200, 1)
Shape of w (3, 1)
[[4.05461939]
 [1.36654227]
 [0.22232607]]
13.123899809534437
[np.float64(256.71195), np.float64(246.59185198715406), np.float64(236.87634436218963), np.float64(227.54918243974993), np.float64(218.59477737225185), np.float64(209.9981694837427), np.float64(201.74500269788348), np.float64(193.82150001464882), np.float64(186.21443999224496), np.float64(178.91113419257573), np.float64(171.89940555033962), np.float64(165.16756762751672), np.float64(158.70440471660967), np.float64(152.49915275753887), np.float64(146.5414810345641), np.float64(140.82147462101204), np.float64(135.32961754093844), np.float64(130.05677661814434), np.float64(124.99418598420203), np.float64(120.13343221833038), np.float64(115.46644009309263), np.float64(110.98545890097499), np.float64(106.68304933794452), np.float64(102.5520709210795), np.float64(98.58566991831972), np.float64(94.77726776929696), np.float64(91.12054997708044

In [23]:
def train_linear_regression(X, y, lr=0.01, epochs=100):
    """
    Huấn luyện mô hình Linear Regression bằng phương pháp gradient descent có vector hóa.

    Args:
        X (numpy.ndarray): Ma trận đặc trưng đầu vào (số lượng mẫu x số lượng đặc trưng).
        y (numpy.ndarray): Vector mục tiêu đầu ra (số lượng mẫu x 1).
        lr (float): Tốc độ học (learning rate).
        epochs (int): Số lượng vòng lặp huấn luyện.

    Returns:
        tuple: Trọng số (weights), bias và danh sách các loss sau khi huấn luyện.
    """
    n_samples, n_features = X.shape
    # Khởi tạo weights và bias
    weights = np.zeros((n_features, 1))
    bias = 0
    losses = []

    # Lặp qua các epochs
    for epoch in range(epochs):
        # Tính toán dự đoán
        y_predicted = np.dot(X, weights) + bias

        # Tính toán gradients
        dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
        db = (1 / n_samples) * np.sum(y_predicted - y)

        # Cập nhật weights và bias
        weights = weights - lr * dw
        bias = bias - lr * db

        # Tính toán loss (Mean Squared Error)
        mse = np.mean((y_predicted - y) ** 2)
        losses.append(mse)

        # In loss sau mỗi 10 epochs để theo dõi quá trình huấn luyện
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}, MSE: {mse:.4f}")

    return weights, bias, losses

In [24]:
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
w, b, loss_history = train_linear_regression(X, y)
print(w)
print(b)
print(loss_history)

Shape of X: (200, 3)
Shape of y: (200, 1)
Epoch 10/100, MSE: 214.4499
Epoch 20/100, MSE: 175.7091
Epoch 30/100, MSE: 144.0750
Epoch 40/100, MSE: 118.2398
Epoch 50/100, MSE: 97.1374
Epoch 60/100, MSE: 79.8982
Epoch 70/100, MSE: 65.8131
Epoch 80/100, MSE: 54.3035
Epoch 90/100, MSE: 44.8974
Epoch 100/100, MSE: 37.2095
[[2.97821952]
 [1.04685752]
 [0.2983854 ]]
9.592247660365402
[np.float64(256.71195), np.float64(251.6260932624114), np.float64(246.64242311573057), np.float64(241.75887742721244), np.float64(236.9734359115534), np.float64(232.28411927551625), np.float64(227.6889883802024), np.float64(223.18614342060218), np.float64(218.77372312206276), np.float64(214.44990395332184), np.float64(210.2128993557601), np.float64(206.06095898853513), np.float64(201.9923679892662), np.float64(198.005446249945), np.float64(194.0985477077558), np.float64(190.27005965049437), np.float64(186.5184020362818), np.float64(182.84202682727576), np.float64(179.2394173370883), np.float64(175.70908759162413), 