In [55]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [56]:
def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)
    tss = np.sum((y - y.mean()) ** 2)
    r2 = 1 - (rss / tss)
    return r2

In [57]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])
r2score(y_pred, y)

1.0

In [58]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
r2score(y_pred, y)

-2.235294117647059

In [59]:
def create_polynomial_features(X, degree=2):
    X_new = X
    for d in range(2, degree + 1):
        X_new = np.c_[X_new, np.power(X, d)]
    return X_new

X = np.array([[1], [2], [3]])
create_polynomial_features(X, degree=2)

array([[1, 1],
       [2, 4],
       [3, 9]])

In [60]:
def create_polynomial_features(X, degree=2):
    X_mem = []
    for X_sub in X.T:
        X_new = X_sub.reshape(-1, 1)
        for d in range(2, degree + 1):
            X_new = np.c_[X_new, np.power(X_sub, d).reshape(-1, 1)]
        X_mem.append(X_new)
    return np.hstack(X_mem)

X = np.array([[1, 2], [2, 3], [3, 4]])
X_poly = create_polynomial_features(X, degree=2)
X_poly


array([[ 1,  1,  2,  4],
       [ 2,  4,  3,  9],
       [ 3,  9,  4, 16]])

In [61]:
data = pd.read_csv('SalesPrediction.csv')
df = pd.get_dummies(data)
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [62]:
df = df.fillna(df.mean())

X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega', 
        'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.33,
    random_state=0
)

In [63]:
scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
X_test_processed = scaler.fit_transform(X_test)
scaler.mean_[0]

53.850231941683234

In [64]:
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed)

# Initialize and train the linear regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Make predictions on the test set
preds = poly_model.predict(X_test_poly)

# Evaluate the model with R^2 score
r2 = r2_score(y_test, preds)
print("R^2 score for the polynomial regression model:", r2)


R^2 score for the polynomial regression model: -2.8146157667846324e+20
