In [12]:
import numpy as np # type: ignore
import pandas as pd # type: ignore
import matplotlib.pyplot as plt # type: ignore

In [20]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initialize weights randomly
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        loss = (1 / (2 * self.num_samples)) * np.sum((y_pred - y_target) ** 2)
        return loss

    def predict(self, X_data):
        y_pred = X_data.dot(self.theta)
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):
            # Predict
            y_pred = self.predict(self.X_data)
            # Compute loss
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            # Compute gradient
            gradients = (1 / self.num_samples) * self.X_data.T.dot(y_pred - self.y_target)

            # Update weights
            self.theta -= self.learning_rate * gradients

            # Print loss every 50 epochs
            if (epoch % 50) == 0:
                print(f"Epoch {epoch} - Loss: {loss}")

        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }

    def r2score(self, y_pred, y):
        rss = np.sum((y_pred - y) ** 2)
        tss = np.sum((y - y.mean()) ** 2)
        r2 = 1 - (rss / tss)
        return r2

In [22]:
df = pd.read_csv('SalesPrediction.csv')
X_data = df[['TV', 'Radio', 'Social Media', 'Influencer']]
y = df[['Sales']]

model = CustomLinearRegression(X_data, y)

#case 1 :
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])
print('case1 :', model.r2score(y_pred, y))

#case 2:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
print('case2 :', model.r2score(y_pred, y))

case1 : 1.0
case2 : -2.235294117647059


In [25]:
# Câu 7:
def create_polynomial_features7(X, degree):
    X_new = X.copy()
    for d in range(2, degree + 1):
        X_new = np.c_[X_new, np.power(X, d)]
    return X_new

X = np.array([[1], [2], [3]])
degree = 2

X_poly = create_polynomial_features7(X, degree)
print(X_poly)

[[1 1]
 [2 4]
 [3 9]]


In [30]:
# Câu 8

def create_polynomial_features8(X, degree=2):
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2, degree + 1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
        X_mem.extend(X_new.T)
    return np.c_[X_mem].T

X = np.array([[1, 2], [2, 3], [3, 4]])
degree = 2

X_poly = create_polynomial_features8(X, degree)
print(X_poly)

[[ 1  1  2  4]
 [ 2  4  3  9]
 [ 3  9  4 16]]


In [31]:
df = pd.get_dummies(df)
df

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.913410,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.246340,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False
...,...,...,...,...,...,...,...,...
4567,26.0,4.472360,0.717090,94.685866,False,False,True,False
4568,71.0,20.610685,6.545573,249.101915,False,False,False,True
4569,44.0,19.800072,5.096192,163.631457,False,False,True,False
4570,71.0,17.534640,1.940873,253.610411,True,False,False,False


In [39]:
# Handle Null values
df = df.fillna(df.mean())

# Get features
X = df [['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega','Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

# Train Test Split
from sklearn.model_selection import train_test_split # type: ignore
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

from sklearn.preprocessing import StandardScaler # type: ignore
sc = StandardScaler()
X_train_processed = sc.fit_transform(X_train)
X_test_processed = sc.transform(X_test)
sc.mean_[0]

54.173577723283785

In [40]:
from sklearn.preprocessing import PolynomialFeatures # type: ignore
from sklearn.linear_model import LinearRegression # type: ignore
from sklearn.metrics import r2_score # type: ignore

# Create polynomial features
poly_features = PolynomialFeatures(degree=2, interaction_only=False)

# Fit and transform the training features
X_train_poly = poly_features.fit_transform(X_train_processed)

# Transform the test features
X_test_poly = poly_features.transform(X_test_processed)

# Create and train the polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Make predictions on the test set
preds = poly_model.predict(X_test_poly)

# Calculate and print the R² score
r2 = r2_score(y_test, preds)
print("R² Score:", r2)

R² Score: 0.9951796158547627
