In [34]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [35]:
from ISLP.models import (ModelSpec,
                         summarize,
                         Column,
                         Feature,
                         build_columns)

import statsmodels.api as sm

In [36]:
sns.set_theme()

In [37]:
mpg = sns.load_dataset("mpg")
mpg = mpg.dropna()

In [38]:
mpg.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin', 'name'],
      dtype='object')

In [39]:
MS = ModelSpec(["cylinders", "displacement", "horsepower", "weight"])
X = MS.fit_transform(mpg)
X.head(10)

Unnamed: 0,intercept,cylinders,displacement,horsepower,weight
0,1.0,8,307.0,130.0,3504
1,1.0,8,350.0,165.0,3693
2,1.0,8,318.0,150.0,3436
3,1.0,8,304.0,150.0,3433
4,1.0,8,302.0,140.0,3449
5,1.0,8,429.0,198.0,4341
6,1.0,8,454.0,220.0,4354
7,1.0,8,440.0,215.0,4312
8,1.0,8,455.0,225.0,4425
9,1.0,8,390.0,190.0,3850


In [40]:
MS_no1 = ModelSpec(["cylinders", "displacement", "horsepower", "weight"], intercept=False)
X = MS_no1.fit_transform(mpg)
X.head(10)

Unnamed: 0,cylinders,displacement,horsepower,weight
0,8,307.0,130.0,3504
1,8,350.0,165.0,3693
2,8,318.0,150.0,3436
3,8,304.0,150.0,3433
4,8,302.0,140.0,3449
5,8,429.0,198.0,4341
6,8,454.0,220.0,4354
7,8,440.0,215.0,4312
8,8,455.0,225.0,4425
9,8,390.0,190.0,3850


In [41]:
from ISLP.models import contrast
cylinders = contrast("cylinders", None)
MS_contr = ModelSpec([cylinders, "displacement", "horsepower", "weight"], intercept=False)
MS_contr.fit_transform(mpg).head(10)

Unnamed: 0,cylinders[3],cylinders[4],cylinders[5],cylinders[6],cylinders[8],displacement,horsepower,weight
0,0.0,0.0,0.0,0.0,1.0,307.0,130.0,3504
1,0.0,0.0,0.0,0.0,1.0,350.0,165.0,3693
2,0.0,0.0,0.0,0.0,1.0,318.0,150.0,3436
3,0.0,0.0,0.0,0.0,1.0,304.0,150.0,3433
4,0.0,0.0,0.0,0.0,1.0,302.0,140.0,3449
5,0.0,0.0,0.0,0.0,1.0,429.0,198.0,4341
6,0.0,0.0,0.0,0.0,1.0,454.0,220.0,4354
7,0.0,0.0,0.0,0.0,1.0,440.0,215.0,4312
8,0.0,0.0,0.0,0.0,1.0,455.0,225.0,4425
9,0.0,0.0,0.0,0.0,1.0,390.0,190.0,3850


In [42]:
cylinders

Column(idx='cylinders', name='cylinders', is_categorical=True, is_ordinal=False, columns=(), encoder=Contrast(method=None))

In [43]:
cylinders.get_columns(mpg)

(array([[0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        ...,
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.]]),
 ['cylinders[3]',
  'cylinders[4]',
  'cylinders[5]',
  'cylinders[6]',
  'cylinders[8]'])

In [46]:
X = MS_contr.transform(mpg)
y = mpg["mpg"]
M_ols = sm.OLS(y, X).fit()
summarize(M_ols)

Unnamed: 0,coef,std err,t,P>|t|
cylinders[3],37.6944,2.53,14.9,0.0
cylinders[4],44.7273,1.336,33.484,0.0
cylinders[5],46.7574,2.873,16.275,0.0
cylinders[6],40.9954,1.861,22.033,0.0
cylinders[8],43.6765,2.533,17.24,0.0
displacement,-0.0001,0.009,-0.015,0.988
horsepower,-0.0607,0.013,-4.601,0.0
weight,-0.0046,0.001,-6.686,0.0


In [50]:
# Interactions
ModelSpec([(cylinders, 'weight'), 'weight']).fit_transform(mpg).head(10)

Unnamed: 0,intercept,cylinders[3]:weight,cylinders[4]:weight,cylinders[5]:weight,cylinders[6]:weight,cylinders[8]:weight,weight
0,1.0,0.0,0.0,0.0,0.0,3504.0,3504
1,1.0,0.0,0.0,0.0,0.0,3693.0,3693
2,1.0,0.0,0.0,0.0,0.0,3436.0,3436
3,1.0,0.0,0.0,0.0,0.0,3433.0,3433
4,1.0,0.0,0.0,0.0,0.0,3449.0,3449
5,1.0,0.0,0.0,0.0,0.0,4341.0,4341
6,1.0,0.0,0.0,0.0,0.0,4354.0,4354
7,1.0,0.0,0.0,0.0,0.0,4312.0,4312
8,1.0,0.0,0.0,0.0,0.0,4425.0,4425
9,1.0,0.0,0.0,0.0,0.0,3850.0,3850
