In [5]:
from sklearn import datasets

In [11]:
data = datasets.load_boston()

In [13]:
print(data.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [14]:
data.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [16]:
import pandas as pd
import numpy as np

In [21]:
df = pd.DataFrame(data=data.data,columns=data.feature_names)

In [22]:
df.shape

(506, 13)

In [23]:
target = pd.DataFrame(data.target, columns=["MEDV"])

In [29]:
import statsmodels.api as sm

In [30]:
X = df["RM"]
y = target["MEDV"]

In [31]:
# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model


In [32]:
# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,MEDV,R-squared (uncentered):,0.901
Model:,OLS,Adj. R-squared (uncentered):,0.901
Method:,Least Squares,F-statistic:,4615.0
Date:,"Sat, 29 Feb 2020",Prob (F-statistic):,3.7399999999999996e-256
Time:,16:50:25,Log-Likelihood:,-1747.1
No. Observations:,506,AIC:,3496.0
Df Residuals:,505,BIC:,3500.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
RM,3.6534,0.054,67.930,0.000,3.548,3.759

0,1,2,3
Omnibus:,83.295,Durbin-Watson:,0.493
Prob(Omnibus):,0.0,Jarque-Bera (JB):,152.507
Skew:,0.955,Prob(JB):,7.649999999999999e-34
Kurtosis:,4.894,Cond. No.,1.0


In [33]:
model = sm.OLS(target,df).fit()

In [35]:
predictions = model.predict(df) # make the predictions by the model

In [36]:
model.summary()

0,1,2,3
Dep. Variable:,MEDV,R-squared (uncentered):,0.959
Model:,OLS,Adj. R-squared (uncentered):,0.958
Method:,Least Squares,F-statistic:,891.3
Date:,"Sat, 29 Feb 2020",Prob (F-statistic):,0.0
Time:,16:54:14,Log-Likelihood:,-1523.8
No. Observations:,506,AIC:,3074.0
Df Residuals:,493,BIC:,3128.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
CRIM,-0.0929,0.034,-2.699,0.007,-0.161,-0.025
ZN,0.0487,0.014,3.382,0.001,0.020,0.077
INDUS,-0.0041,0.064,-0.063,0.950,-0.131,0.123
CHAS,2.8540,0.904,3.157,0.002,1.078,4.630
NOX,-2.8684,3.359,-0.854,0.394,-9.468,3.731
RM,5.9281,0.309,19.178,0.000,5.321,6.535
AGE,-0.0073,0.014,-0.526,0.599,-0.034,0.020
DIS,-0.9685,0.196,-4.951,0.000,-1.353,-0.584
RAD,0.1712,0.067,2.564,0.011,0.040,0.302

0,1,2,3
Omnibus:,204.082,Durbin-Watson:,0.999
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1374.225
Skew:,1.609,Prob(JB):,3.9e-299
Kurtosis:,10.404,Cond. No.,8500.0


In [37]:
model.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared (uncentered):,0.958
Dependent Variable:,MEDV,AIC:,3073.5509
Date:,2020-02-29 16:54,BIC:,3128.4959
No. Observations:,506,Log-Likelihood:,-1523.8
Df Model:,13,F-statistic:,891.3
Df Residuals:,493,Prob (F-statistic):,0.0
R-squared (uncentered):,0.959,Scale:,24.803

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
CRIM,-0.0929,0.0344,-2.6988,0.0072,-0.1605,-0.0253
ZN,0.0487,0.0144,3.3822,0.0008,0.0204,0.0770
INDUS,-0.0041,0.0644,-0.0630,0.9498,-0.1307,0.1226
CHAS,2.8540,0.9039,3.1574,0.0017,1.0780,4.6300
NOX,-2.8684,3.3587,-0.8540,0.3935,-9.4676,3.7308
RM,5.9281,0.3091,19.1782,0.0000,5.3208,6.5355
AGE,-0.0073,0.0138,-0.5262,0.5990,-0.0344,0.0199
DIS,-0.9685,0.1956,-4.9507,0.0000,-1.3529,-0.5841
RAD,0.1712,0.0668,2.5640,0.0106,0.0400,0.3023

0,1,2,3
Omnibus:,204.082,Durbin-Watson:,0.999
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1374.225
Skew:,1.609,Prob(JB):,0.0
Kurtosis:,10.404,Condition No.:,8498.0
