In [1]:
import pandas as pd
import numpy as np
import random

# 1. Linear Regression

In [7]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initial weights
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []
    
    def compute_loss(self, y_pred, y_target):
        self.loss = y_target - y_pred
        return self.loss
    
    def predict(self, X_data):
        y_pred = X_data.dot(self.theta)
        return y_pred
    
    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.X_data)

            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            loss_grd = 2*(y_pred-self.y_target)/self.num_samples
            gradients = self.X_data.T.dot(loss_grd)

            self.theta = self.theta - self.learning_rate*gradients

            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')
            
        return {
            'loss': sum(self.losses)/len(self.losses),
            'weight': self.theta
        } 

In [4]:
def r2score(y_pred, y_target):
    rss = np.sum((y_target - y_pred)**2)
    tss = np.sum((y_target - y_target.mean())**2)
    r2 = 1-(rss/tss)
    return r2

In [5]:
y_pred = np.array([1,2,3,4,5])
y_target = np.array([1,2,3,4,5])
r2score(y_pred, y_target)

1.0

In [6]:
y_pred = np.array([1,2,3,4,5])
y_target = np.array([3,5,5,2,4])
r2score(y_pred, y_target)

-2.235294117647059

# 2. Polynomial Regression

In [8]:
def create_polynomial_features(X, degree=2):
    """Create the polynomial features
    Args:
        X: An array tensor for the data.
        degree: An integer for the degree of the generated polynomial function.
    """
    X_new = X
    for d in range(2, degree + 1):
        X_new = np.c_[X_new, np.power(X, d)]
    return X_new

In [9]:
X = np.array([[1], [2], [3]])
create_polynomial_features(X)

array([[1, 1],
       [2, 4],
       [3, 9]])

In [31]:
def create_polynomial_features(X, degree=2):
    """Create the polynomial features
    Args:
        X: An array tensor for the data.
        degree: An integer for the degree of the generated polynomial function.
    """
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2, degree +1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
            X_mem.extend(X_new.T)
    return np.c_[X_mem].T

In [32]:
X = np.array([[1, 2],
              [2, 3],
              [3, 4]])
create_polynomial_features(X)

array([[ 1,  1,  2,  4],
       [ 2,  4,  3,  9],
       [ 3,  9,  4, 16]])

# 3. Sales Prediction

In [25]:
df = pd.read_csv(r'C:\Users\Administrator\Desktop\Study\Git\AIO-Exercise\project_module4\sales_prediction\SalesPrediction.csv')
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [27]:
df = pd.get_dummies(df)

In [28]:
df.columns

Index(['TV', 'Radio', 'Social Media', 'Sales', 'Influencer_Macro',
       'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano'],
      dtype='object')

In [30]:
# Handle NULL values
df = df.fillna(df.mean())

# Get features
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro',
       'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

# Train Test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
scaler.mean_[0]


54.173577723283785

In [34]:
X_test_processed = scaler.fit_transform(X_test)
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2)

X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
preds = poly_model.predict(X_test_poly)
r2_score(y_test, preds)

-2088560540581.627