In [1]:

from __future__ import print_function
from statsmodels.compat import lzip
import statsmodels
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms


In [2]:
# Load data
url = 'http://vincentarelbundock.github.io/Rdatasets/csv/HistData/Guerry.csv'
dat = pd.read_csv(url)



In [3]:

# Fit regression model (using the natural log of one of the regressaors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()


In [4]:
# Inspect the results
print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                Lottery   R-squared:                       0.348
Model:                            OLS   Adj. R-squared:                  0.333
Method:                 Least Squares   F-statistic:                     22.20
Date:                Wed, 26 Apr 2017   Prob (F-statistic):           1.90e-08
Time:                        03:20:23   Log-Likelihood:                -379.82
No. Observations:                  86   AIC:                             765.6
Df Residuals:                      83   BIC:                             773.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------
Intercept         246.4341     35.233     

In [5]:
 ## Normality of the residuals

# Jarque-Bera test:

name = ['Jarque-Bera', 'Chi^2 two-tail prob.', 'Skew', 'Kurtosis']
test = sms.jarque_bera(results.resid)
lzip(name, test)


[('Jarque-Bera', 3.393608024843173),
 ('Chi^2 two-tail prob.', 0.1832683123166331),
 ('Skew', -0.4865803431122342),
 ('Kurtosis', 3.003417757881633)]

In [6]:
# Omni test:

name = ['Chi^2', 'Two-tail probability']
test = sms.omni_normtest(results.resid)
lzip(name, test)


[('Chi^2', 3.7134378115971884), ('Two-tail probability', 0.15618424580304768)]

In [7]:

# ## Influence tests
# 
# Once created, an object of class ``OLSInfluence`` holds attributes and methods that allow users to assess the influence of each observation. For example, we can compute and extract the first few rows of DFbetas by:

from statsmodels.stats.outliers_influence import OLSInfluence
test_class = OLSInfluence(results)
test_class.dfbetas[:5,:]


array([[-0.00301154,  0.00290872,  0.00118179],
       [-0.06425662,  0.04043093,  0.06281609],
       [ 0.01554894, -0.03556038, -0.00905336],
       [ 0.17899858,  0.04098207, -0.18062352],
       [ 0.29679073,  0.21249207, -0.3213655 ]])

In [8]:

# Explore other options by typing ``dir(influence_test)``
# 
# Useful information on leverage can also be plotted:

from statsmodels.graphics.regressionplots import plot_leverage_resid2
print(plot_leverage_resid2(results))



Figure(432x288)


In [9]:

# Other plotting options can be found on the [Graphics page.](http://www.statsmodels.org/stable/graphics.html)

# ## Multicollinearity
# 
# Condition number:

np.linalg.cond(results.model.exog)



702.17921454900625

In [10]:

# ## Heteroskedasticity tests
# 
# Breush-Pagan test:

name = ['Lagrange multiplier statistic', 'p-value', 
        'f-value', 'f p-value']
test = sms.het_breushpagan(results.resid, results.model.exog)
lzip(name, test)


[('Lagrange multiplier statistic', 4.8932133740940147),
 ('p-value', 0.086586905023519636),
 ('f-value', 2.5037159462564689),
 ('f p-value', 0.087940287826727276)]

In [11]:

# Goldfeld-Quandt test

name = ['F statistic', 'p-value']
test = sms.het_goldfeldquandt(results.resid, results.model.exog)
lzip(name, test)


[('F statistic', 1.1002422436378139), ('p-value', 0.38202950686925286)]

In [12]:

# ## Linearity
# 
# Harvey-Collier multiplier test for Null hypothesis that the linear specification is correct:

name = ['t value', 'p value']
test = sms.linear_harvey_collier(results)
lzip(name, test)



[('t value', -1.0796490077789866), ('p value', 0.28346392475559085)]