## Multiple Linear Regression

Import the required libraries and load the dataset.

In [None]:
# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from statsmodels.formula.api import ols

%matplotlib inline
plt.style.use("seaborn-white")

**Load Datasets**

Datasets available on https://www.statlearning.com/resources-first-edition

In [None]:
data_url = "https://github.com/pykale/transparentML/raw/main/data/Advertising.csv"

advertising_df = pd.read_csv(
    data_url, header=0, index_col=0
)  # read Boston data as pandas data frame

In [None]:
credit_url = "https://github.com/pykale/transparentML/raw/main/data/Credit.csv"

credit_df = pd.read_csv(credit_url)
credit_df["Student2"] = credit_df.Student.map({"No": 0, "Yes": 1})
credit_df.head(3)

In [None]:
auto_url = "https://github.com/pykale/transparentML/raw/main/data/Auto.csv"

auto_df = pd.read_csv(auto_url, na_values="?").dropna()
auto_df.info()

### Table 3.3 - Statsmodels

In [None]:
est = ols("Sales ~ Radio", advertising_df).fit()
est.summary().tables[1]

In [None]:
est = ols("Sales ~ Newspaper", advertising_df).fit()
est.summary().tables[1]

### Table 3.4 & 3.6 - Statsmodels

In [None]:
est = ols("Sales ~ TV + Radio + Newspaper", advertising_df).fit()
est.summary()

### Table 3.5 - Correlation Matrix

In [None]:
advertising_df.corr()

### Figure 3.5 - Multiple Linear Regression

In [None]:
regr = LinearRegression()

X = advertising_df[["Radio", "TV"]].values
y = advertising_df.Sales

regr.fit(X, y)
print(regr.coef_)
print(regr.intercept_)

In [None]:
# What are the min/max values of Radio & TV?
# Use these values to set up the grid for plotting.
advertising_df[["Radio", "TV"]].describe()

In [None]:
# Create a coordinate grid
Radio = np.arange(0, 50)
TV = np.arange(0, 300)

B1, B2 = np.meshgrid(Radio, TV, indexing="xy")
Z = np.zeros((TV.size, Radio.size))

for (i, j), v in np.ndenumerate(Z):
    Z[i, j] = regr.intercept_ + B1[i, j] * regr.coef_[0] + B2[i, j] * regr.coef_[1]

In [None]:
# Create plot
fig = plt.figure(figsize=(10, 6))
fig.suptitle("Regression: Sales ~ Radio + TV Advertising", fontsize=20)

ax = axes3d.Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)

ax.plot_surface(B1, B2, Z, rstride=10, cstride=5, alpha=0.4)
ax.scatter3D(advertising_df.Radio, advertising_df.TV, advertising_df.Sales, c="r")

ax.set_xlabel("Radio")
ax.set_xlim(0, 50)
ax.set_ylabel("TV")
ax.set_ylim(ymin=0)
ax.set_zlabel("Sales")
plt.show()