In [6]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm
from ISLP import load_data
from ISLP.models import summarize, poly, ModelSpec as MS

In [9]:
Boston = load_data('Boston')
Boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'lstat', 'medv'],
      dtype='object')

In [24]:
Boston.head(3)

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7


In [14]:
X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]), 'lstat': Boston['lstat']})
X[:4]

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94


In [20]:
y = Boston['medv']
model = sm.OLS(y, X)
results = model.fit()
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,34.5538,0.563,61.415,0.0
lstat,-0.95,0.039,-24.528,0.0


In [22]:
results.params

intercept    34.553841
lstat        -0.950049
dtype: float64

### Linear regression with Predictors

In [27]:
X = MS(['lstat', 'age']).fit_transform(Boston)
X.head(3)

Unnamed: 0,intercept,lstat,age
0,1.0,4.98,65.2
1,1.0,9.14,78.9
2,1.0,4.03,61.1


In [32]:
model = sm.OLS(y, X)
results = model.fit()
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,33.2228,0.731,45.458,0.0
lstat,-1.0321,0.048,-21.416,0.0
age,0.0345,0.012,2.826,0.005


In [33]:
results.params

intercept    33.222761
lstat        -1.032069
age           0.034544
dtype: float64

### Multiple Regression

In [37]:
new_col = Boston.columns.drop('medv')
new_col

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'lstat'],
      dtype='object')

In [42]:
X = MS(new_col).fit_transform(Boston)
X.head()

Unnamed: 0,intercept,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat
0,1.0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,4.98
1,1.0,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14
2,1.0,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03
3,1.0,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94
4,1.0,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33


In [46]:
model = sm.OLS(y, X)
result = model.fit()
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,33.2228,0.731,45.458,0.0
lstat,-1.0321,0.048,-21.416,0.0
age,0.0345,0.012,2.826,0.005


In [47]:
result.params

intercept    41.617270
crim         -0.121389
zn            0.046963
indus         0.013468
chas          2.839993
nox         -18.758022
rm            3.658119
age           0.003611
dis          -1.490754
rad           0.289405
tax          -0.012682
ptratio      -0.937533
lstat        -0.552019
dtype: float64