In [1]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm

from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize, poly)

In [2]:
boston = pd.read_csv("Boston.csv")

In [3]:
boston.shape

(506, 14)

__*2(a)*__

In [66]:
X = MS(['zn']).fit_transform(boston)
slr1 = sm.OLS(boston.crim, X).fit()

In [67]:
X = MS(['indus']).fit_transform(boston)
slr2 = sm.OLS(boston.crim, X).fit()

In [68]:
X = MS(['chas']).fit_transform(boston)
slr3 = sm.OLS(boston.crim, X).fit()

In [69]:
X = MS(['nox']).fit_transform(boston)
slr4 = sm.OLS(boston.crim, X).fit()

In [70]:
X = MS(['rm']).fit_transform(boston)
slr5 = sm.OLS(boston.crim, X).fit()

In [71]:
X = MS(['age']).fit_transform(boston)
slr6 = sm.OLS(boston.crim, X).fit()

In [72]:
X = MS(['dis']).fit_transform(boston)
slr7 = sm.OLS(boston.crim, X).fit()

In [73]:
X = MS(['rad']).fit_transform(boston)
slr8 = sm.OLS(boston.crim, X).fit()

In [74]:
X = MS(['tax']).fit_transform(boston)
slr9 = sm.OLS(boston.crim, X).fit()

In [75]:
X = MS(['ptratio']).fit_transform(boston)
slr10 = sm.OLS(boston.crim, X).fit()

In [76]:
X = MS(['black']).fit_transform(boston)
slr11 = sm.OLS(boston.crim, X).fit()

In [77]:
X = MS(['lstat']).fit_transform(boston)
slr12 = sm.OLS(boston.crim, X).fit()

In [78]:
X = MS(['medv']).fit_transform(boston)
slr13 = sm.OLS(boston.crim, X).fit()

__*2(b)*__

All the predictors except for 'chas' have statistically significant relationship with the response. The predictor 'nox' representing nitric oxide concentration has a large positive association (coef = $31.2485$) with the per capita crime rate. So, if the level of air pollution (by nitrogen oxide) goes up, the crime rate also goes up. 'Chas' represents tracts binding the river (a binary variable). The model predicts that tracts binding the river have ($-1.8928$) fewer crimes per capita compared to the tracts not binding the river. However, this relationship is not statistically significant. The predictor 'rm' representing average number of rooms per dwelling has a negative small association with the crime rate per capita. As the average number of rooms per dwelling increases, the crime rate decreases. The predictor 'dis' representing weighted distance to Boston employment centers has a negative small association with the crime rate per capita. And 'medv' denoting median home value has a negative small association with the crime rate per capita.

Now we discuss how these relationships differ. First, nox, rm, dis and medv have statistically significant relationships with the crime rate per capita. But the association between chas and crime rate is not statistically significant. Second, only nox has a positive association. All the other predictors have a negative association with the crime rate. Though the coefficients are unstandardized, only nox seems to have a large association with the crime rate. All other predictors seem to have a smaller association with the crime rate. 

__*2(c)*__

In [79]:
boston.columns = ['crim', 'zn',	'indus', 'chas', 'nox', 
                  'rm', 'age', 'dis',	'rad', 'tax', 
                  'ptratio', 'black', 'lstat', 'medv']
X = MS(boston.columns.drop(['crim'])).fit_transform(boston)
mlr = sm.OLS(boston.crim, X).fit()
summarize(mlr)

Unnamed: 0,coef,std err,t,P>|t|
intercept,17.0332,7.235,2.354,0.019
zn,0.0449,0.019,2.394,0.017
indus,-0.0639,0.083,-0.766,0.444
chas,-0.7491,1.18,-0.635,0.526
nox,-10.3135,5.276,-1.955,0.051
rm,0.4301,0.613,0.702,0.483
age,0.0015,0.018,0.081,0.935
dis,-0.9872,0.282,-3.503,0.001
rad,0.5882,0.088,6.68,0.0
tax,-0.0038,0.005,-0.733,0.464


Proportion of residential land zoned for lots ('zn'), average number of rooms per dwelling ('rm'), proportion of owner-occupied units built prior to 1940 ('age'), index of accessibility to radial highways ('rad'), and % lower status of the population ('lstat') have a positive association with the crime rate. All the other predictors have a negative association with the crime rate. Only Nitric Oxides concentration seems to have a large association with the crime rate. Proportion of owner-occupied units built prior to 1940 ('age') and full-value property-tax rate per $10,000 ('tax') have very low association with the crime rate compared to other predictors. 

Proportion of residential land zoned for lots ('zn'), weighted distance to Boston employment centers ('dis'), index of accessibility to radial highways ('rad'), proportion of blacks by town ('black'), and median home value ('medv') have statistically significant relationships with the crime rate. These predictors have p-values less than $0.05$. So, for these predictors we can reject the null hypothesis $H_0: \beta_j = 0$. 

__*2(d)*__

In [86]:
y = boston["crim"].values
predictors = [c for c in boston.columns if c != "crim"]

slr_rows = []
for v in predictors:
    X_v = MS([v]).fit_transform(boston)      
    slr = sm.OLS(y, X_v).fit()
    slr_rows.append({
    "predictor":v, "SLR_beta":slr.params.iloc[0], 
        "SLR_SE":slr.bse.iloc[0], "SLR_p":slr.pvalues.iloc[0],
    })
slr_tbl = pd.DataFrame(slr_rows).set_index("predictor")

mlr_tbl = pd.DataFrame({"MLR_beta":mlr.params, "MLR_SE":mlr.bse, "MLR_p":mlr.pvalues})

compare_tbl = slr_tbl.join(mlr_tbl).round(4)
print(compare_tbl)

           SLR_beta  SLR_SE   SLR_p  MLR_beta  MLR_SE   MLR_p
predictor                                                    
zn           4.4537  0.4172  0.0000    0.0449  0.0187  0.0170
indus       -2.0637  0.6672  0.0021   -0.0639  0.0834  0.4443
chas         3.7444  0.3961  0.0000   -0.7491  1.1801  0.5259
nox        -13.7199  1.6995  0.0000  -10.3135  5.2755  0.0512
rm          20.4818  3.3645  0.0000    0.4301  0.6128  0.4831
age         -3.7779  0.9440  0.0001    0.0015  0.0179  0.9355
dis          9.4993  0.7304  0.0000   -0.9872  0.2818  0.0005
rad         -2.2872  0.4435  0.0000    0.5882  0.0880  0.0000
tax         -8.5284  0.8158  0.0000   -0.0038  0.0052  0.4638
ptratio    -17.6469  3.1473  0.0000   -0.2711  0.1865  0.1466
black       16.5535  1.4259  0.0000   -0.0075  0.0037  0.0407
lstat       -3.3305  0.6938  0.0000    0.1262  0.0757  0.0962
medv        11.7965  0.9342  0.0000   -0.1989  0.0605  0.0011


__*2(e)*__

In [80]:
X = MS([poly('age', degree=3)]).fit_transform(boston)
multimodel2 = sm.OLS(boston.crim, X).fit()
summarize(multimodel2)

Unnamed: 0,coef,std err,t,P>|t|
intercept,3.6135,0.349,10.368,0.0
"poly(age, degree=3)[0]",68.182,7.84,8.697,0.0
"poly(age, degree=3)[1]",37.4845,7.84,4.781,0.0
"poly(age, degree=3)[2]",21.3532,7.84,2.724,0.007


For age, the polynomial terms have statistically significant relationship with the crime rate (p-value = $0.000 < 0.05$ for quadratic term and $0.007 < 0.05$ for cubic term).

In [81]:
X = MS([poly('tax', degree=3)]).fit_transform(boston)
multimodel2 = sm.OLS(boston.crim, X).fit()
summarize(multimodel2)

Unnamed: 0,coef,std err,t,P>|t|
intercept,3.6135,0.305,11.86,0.0
"poly(tax, degree=3)[0]",112.6458,6.854,16.436,0.0
"poly(tax, degree=3)[1]",32.0873,6.854,4.682,0.0
"poly(tax, degree=3)[2]",-7.9968,6.854,-1.167,0.244


For tax, only the quadratic term has statistically significant relationship with the crime rate (p-value = $0.000$) but cubic term does not have statistically significant relationship with the crime rate (p-value = $0.244 >$ threshold $0.05$).