In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("./salary.csv")
df.head(), df.shape

In [None]:
X = df.YearsExperience
y = df.Salary
X.shape

In [None]:
plt.scatter(df["YearsExperience"], df["Salary"])
plt.xlabel("Years of Experience")

In [None]:
def r2score(y_true, y_pred):
    """
    Calculate the R^2 score
    """
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (ss_res / ss_tot)

In [None]:
class LinearRegression:
    def __init__(self, X, y, learning_rate=0.01, epochs=1000):
        self.X = X
        self.y = y
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.num_samples = X.shape[0]

        self.theta = np.random.rand(X.shape[1])
        self.loss = []

    def compute_loss(self, y_pred, y):
        return np.mean((y_pred - y) ** 2)

    def predict(self, X):
        return np.dot(X, self.theta)

    def fit(self):
        for epoch in range(self.epochs):
            y_pred = self.predict(self.X)
            loss = self.compute_loss(y_pred, self.y)
            self.loss.append(loss)
            gradient = self.X.T.dot(y_pred - self.y) / self.num_samples
            self.theta -= self.learning_rate * gradient
            print(f"Epoch {epoch}/{self.epochs}, Loss: {loss:.4f}")

        return {"weight": self.theta, "loss": self.loss[-1]}

In [None]:
X_linear = X.values.reshape(-1, 1)
X_linear[:5]

In [None]:
X_linear = np.c_[np.ones(X_linear.shape[0]), X_linear]  # Thêm cột bias
X_linear[:5]

In [None]:
linear_model = LinearRegression(
    X_linear,
    y,
    learning_rate=0.001,
    epochs=50,
)
linear_model.fit()

In [None]:
Y_pred = linear_model.predict(X_linear)
linear_score = r2score(Y_pred, y)
print(f"R^2 score: {linear_score:.4f}")

In [None]:
def create_polynomial_features(X, degree=2):
    """
    Create polynomial features for the input data
    """
    X_new = X
    for d in range(2, degree + 1):
        X_new = np.c_[X_new, X**d]
    return X_new

In [None]:
X_poly = create_polynomial_features(X, degree=2)
X_poly[:5]

In [None]:
X_poly = np.c_[np.ones(X_poly.shape[0]), X_poly]  # Thêm cột bias
X_poly[:5]

In [None]:
poly_model = LinearRegression(
    X_poly,
    y,
    learning_rate=0.00001,
    epochs=50,
)
poly_model.fit()

In [None]:
y_pred_poly = poly_model.predict(X_poly)
poly_score = r2score(y_pred_poly, y)
print(f"linear R^2 score: {linear_score:.4f}")
print(f"polynomial R^2 score: {poly_score:.4f}")
print(f"linear last loss: {linear_model.loss[-1]:.4f}")
print(f"polynomial last loss: {poly_model.loss[-1]:.4f}")

In [None]:
plt.plot(
    X,
    y,
    "yo",
    X,
    linear_model.predict(X_linear),
    "r-",
    X,
    poly_model.predict(X_poly),
    "b-",
)
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.title("Linear Regression")
plt.show()