### Explore How GLMs Generalize Multiple Linear Regression Models ###

#### Data Setup and Exploration ####

In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Import the rdatasets package
from rdatasets import data as rdata

# Load the 'cars' dataset
cars_data = sm.datasets.get_rdataset("mtcars", "datasets").data

# Inspect the data
print(cars_data.head())
print(cars_data.info())
print(cars_data.describe())

                    mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  \
rownames                                                                     
Mazda RX4          21.0    6  160.0  110  3.90  2.620  16.46   0   1     4   
Mazda RX4 Wag      21.0    6  160.0  110  3.90  2.875  17.02   0   1     4   
Datsun 710         22.8    4  108.0   93  3.85  2.320  18.61   1   1     4   
Hornet 4 Drive     21.4    6  258.0  110  3.08  3.215  19.44   1   0     3   
Hornet Sportabout  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3   

                   carb  
rownames                 
Mazda RX4             4  
Mazda RX4 Wag         4  
Datsun 710            1  
Hornet 4 Drive        1  
Hornet Sportabout     2  
<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, Mazda RX4 to Volvo 142E
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   mpg     32 non-null     float64
 1   cyl     32 non-null     int64  
 2   disp

#### Fit a Multivariate Regression Model ####

In [2]:
from sklearn.linear_model import LinearRegression

# Define predictors (X) and response variables (Y)
X = cars_data[['wt']]  # Predictor: weight of the car
Y = cars_data[['mpg', 'hp']]  # Response variables: mpg and hp

# Initialize and fit the multivariate regression model
multi_reg = LinearRegression().fit(X, Y)

# Extract coefficients and intercepts
print("Intercepts:", multi_reg.intercept_)
print("Coefficients:", multi_reg.coef_)

Intercepts: [37.28512617 -1.82092177]
Coefficients: [[-5.34447157]
 [46.16005028]]


#### Model Evaluation ####

In [3]:
from sklearn.metrics import r2_score

# Predict the response variables
Y_pred = multi_reg.predict(X)

# Compute R-squared for each response variable
r2_mpg = r2_score(Y['mpg'], Y_pred[:, 0])  # R-squared for mpg
r2_hp = r2_score(Y['hp'], Y_pred[:, 1])  # R-squared for hp

print(f"R-squared for mpg: {r2_mpg:.3f}")
print(f"R-squared for hp: {r2_hp:.3f}")

R-squared for mpg: 0.753
R-squared for hp: 0.434


#### Add Quadratic Terms ####

In [5]:
# Add quadratic term: weight squared
cars_data['wt_squared'] = cars_data['wt'] ** 2

# Update predictors
X_quad = cars_data[['wt', 'wt_squared']]

# Fit the updated model
multi_reg_quad = LinearRegression().fit(X_quad, Y)

# Extract coefficients and intercepts
print("Updated Intercepts:", multi_reg_quad.intercept_)
print("Updated Coefficients:", multi_reg_quad.coef_)


Updated Intercepts: [ 49.93081095 -76.73441384]
Updated Coefficients: [[-13.38033708   1.17108689]
 [ 93.76480697  -6.93756093]]


#### Model Comparison Using R-squared #### 

In [6]:
# Predict using the updated model
Y_pred_quad = multi_reg_quad.predict(X_quad)

# Compute R-squared for the updated model
r2_mpg_quad = r2_score(Y['mpg'], Y_pred_quad[:, 0])
r2_hp_quad = r2_score(Y['hp'], Y_pred_quad[:, 1])

print(f"Updated R-squared for mpg: {r2_mpg_quad:.3f}")
print(f"Updated R-squared for hp: {r2_hp_quad:.3f}")

Updated R-squared for mpg: 0.819
Updated R-squared for hp: 0.452


#### Final Model and Recommendations ####

In [8]:
# Create a dictionary to store the values
data = {
   'Metric': ['R-squared for mpg', 'R-squared for hp'],
   'GLM Model': [r2_mpg, r2_hp],
   'Quadractic Model': [r2_mpg_quad, r2_hp_quad]
}

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame as a table
print(df.to_markdown(index=False))

| Metric            |   GLM Model |   Quadractic Model |
|:------------------|------------:|-------------------:|
| R-squared for mpg |    0.752833 |           0.819061 |
| R-squared for hp  |    0.433949 |           0.451908 |
