In [1]:
import pandas as pd

# load data
data_df = pd.read_csv("c3_marketing-campaign.csv")
print("data_df shape:", data_df.shape)

data_df shape: (50, 4)


In [2]:
data_df.head()

Unnamed: 0,tv,web,radio,sales
0,0.916,1.689,0.208,1.204
1,9.359,1.706,1.071,4.8
2,5.261,2.538,2.438,3.97
3,8.682,2.092,1.283,5.212
4,11.736,1.66,1.8,5.993


In [3]:
# extract input matrix X (ie remove sales)
X = data_df.drop("sales", axis=1).values
print("X:", X.shape)

X: (50, 3)


In [4]:
# extract target vector y
y = data_df.sales.values
print("y:", y.shape)

y: (50,)


In [5]:
from scipy.linalg import lstsq

# fit multiple linear regression model
w, rss, _, _ = lstsq(X, y)
print("w:", w)
print("RSS:", rss)

w: [0.3958359  0.47521518 0.31040001]
RSS: 1.6884039033000027


RSS on the simple linear regression model was 15.7 so result improved dramatically adding in two other marketing budgets

In [6]:
# add column of ones to matrix for the intercept term w0

import numpy as np

# add a column of ones
X1 = np.c_[np.ones(X.shape[0]),  # Vector of ones of shape (n,)
           X]                    # X matrix of shape (n,p)

X1[:5, :]

array([[ 1.   ,  0.916,  1.689,  0.208],
       [ 1.   ,  9.359,  1.706,  1.071],
       [ 1.   ,  5.261,  2.538,  2.438],
       [ 1.   ,  8.682,  2.092,  1.283],
       [ 1.   , 11.736,  1.66 ,  1.8  ]])

In [7]:
# now pass X1 matrix to lstsq() function
w, rss, _, _ = lstsq(X1, y)

print("w:", w)
print("RSS:", rss)

w: [0.02487092 0.39465146 0.47037002 0.30669954]
RSS: 1.6854508680824705


In [8]:
# array w now has four elements and can now use to compute predictions

# compute predictions
y_pred = np.matmul(X1, w)
print("y_pred:", y_pred.shape)

y_pred: (50,)


In [9]:
# Verify RSS score
def RSS(y, y_pred):
    return np.sum(np.square(np.subtract(y, y_pred)))


rss = RSS(y, y_pred)
print("RSS:", rss)

RSS: 1.6854508680824705
