In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm
import ISLP
from ISLP.models import (ModelSpec as MS, summarize, poly)

In [3]:
boston = ISLP.load_data('Boston')
boston

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,6.48,22.0


In [4]:
X = pd.DataFrame({'intercept':np.ones(boston.shape[0]), 'lstat':boston['lstat']})
y = boston['medv']
model = sm.OLS(y, X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,medv,R-squared:,0.544
Model:,OLS,Adj. R-squared:,0.543
Method:,Least Squares,F-statistic:,601.6
Date:,"Tue, 28 Jan 2025",Prob (F-statistic):,5.08e-88
Time:,11:27:52,Log-Likelihood:,-1641.5
No. Observations:,506,AIC:,3287.0
Df Residuals:,504,BIC:,3295.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,34.5538,0.563,61.415,0.000,33.448,35.659
lstat,-0.9500,0.039,-24.528,0.000,-1.026,-0.874

0,1,2,3
Omnibus:,137.043,Durbin-Watson:,0.892
Prob(Omnibus):,0.0,Jarque-Bera (JB):,291.373
Skew:,1.453,Prob(JB):,5.36e-64
Kurtosis:,5.319,Cond. No.,29.7


In [7]:
design = MS(['lstat'])
dir(design)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_feature_names',
 '_check_n_features',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_sklearn_auto_wrap_output_keys',
 '_validate_data',
 '_validate_params',
 'build_sequence',
 'build_submodel',
 'categorical_encoders',
 'categorical_encoders_',
 'categorical_features',
 'fit',
 'fit_transform',
 'get_params',
 'intercept',
 'names',
 'set_output',
 'set_params',
 'terms',
 'transform']

In [8]:
design_fit = design.fit(boston)
design_fit

In [9]:
X = design.transform(boston)
X

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94
4,1.0,5.33
...,...,...
501,1.0,9.67
502,1.0,9.08
503,1.0,5.64
504,1.0,6.48


In [10]:
design = MS(['lstat'])
X = design.fit_transform(boston)
X

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94
4,1.0,5.33
...,...,...
501,1.0,9.67
502,1.0,9.08
503,1.0,5.64
504,1.0,6.48


In [12]:
results.params

intercept    34.553841
lstat        -0.950049
dtype: float64

In [14]:
new_df = pd.DataFrame({'lstat': [5, 10, 15, 20, 25, 30]})
newX = design.transform(new_df)
newX

Unnamed: 0,intercept,lstat
0,1.0,5
1,1.0,10
2,1.0,15
3,1.0,20
4,1.0,25
5,1.0,30


In [18]:
new_pred = results.get_prediction(newX)
new_pred.predicted_mean, new_pred.conf_int(alpha=0.05), new_pred.conf_int(obs=True, alpha=0.05)

(array([29.80359411, 25.05334734, 20.30310057, 15.5528538 , 10.80260704,
         6.05236027]),
 array([[29.00741194, 30.59977628],
        [24.47413202, 25.63256267],
        [19.73158815, 20.87461299],
        [14.77355013, 16.33215748],
        [ 9.71745557, 11.8877585 ],
        [ 4.62500435,  7.47971618]]),
 array([[17.56567478, 42.04151344],
        [12.82762635, 37.27906833],
        [ 8.0777421 , 32.52845905],
        [ 3.31602098, 27.78968663],
        [-1.45750378, 23.06271785],
        [-6.24276512, 18.34748566]]))

In [None]:
results.get_prediction(newX)