In [1]:
import numpy as np
from families import Gaussian, Bernoulli, Poisson
from glm import GLM

import statsmodels.api as sm
import statsmodels

  from pandas.core import datetools


In [2]:
N = 10000
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = np.random.uniform(size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

## Linear Model

In [3]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x10f274080>

In [4]:
model.coef_

array([ 1.01383862, -1.96991877,  0.96472229])

In [5]:
model.coef_covariance_matrix_

array([[  7.20424363e-04,  -6.19360287e-04,  -6.17559426e-04],
       [ -6.19360287e-04,   1.23016629e-03,   1.30286762e-05],
       [ -6.17559426e-04,   1.30286762e-05,   1.21784312e-03]])

In [6]:
model.coef_standard_error_

array([ 0.02684072,  0.03507373,  0.03489761])

In [7]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.283
Model:                            OLS   Adj. R-squared:                  0.283
Method:                 Least Squares   F-statistic:                     1976.
Date:                Mon, 28 Aug 2017   Prob (F-statistic):               0.00
Time:                        21:21:45   Log-Likelihood:                -14288.
No. Observations:               10000   AIC:                         2.858e+04
Df Residuals:                    9997   BIC:                         2.860e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0138      0.027     37.772      0.0

## Linear Model with Sample Weights

In [8]:
sample_weights = np.random.uniform(0, 2, size=N)

In [9]:
model = GLM(family=Gaussian())
model = model.fit(X, y, sample_weights=sample_weights)

In [10]:
model.coef_

array([ 0.9942108 , -1.9590109 ,  1.00513803])

## Logistic Model

In [11]:
p = 1 / (1 + np.exp(-nu))
y_logistic = np.random.binomial(1, p=p, size=N)

In [12]:
model = GLM(family=Bernoulli())
model.fit(X, y_logistic)

<glm.GLM at 0x11cb36438>

In [13]:
model.coef_

array([ 0.99887922, -2.12304975,  1.10009535])

In [14]:
model.dispersion_

array(1.0)

In [15]:
model.coef_covariance_matrix_

array([[ 0.00331026, -0.0030408 , -0.00252152],
       [-0.0030408 ,  0.00609283, -0.00043699],
       [-0.00252152, -0.00043699,  0.00568711]])

In [16]:
model.coef_standard_error_

array([ 0.05753487,  0.07805661,  0.07541293])

In [17]:
mod = sm.Logit(y_logistic, X)
res = mod.fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.618568
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                10000
Model:                          Logit   Df Residuals:                     9997
Method:                           MLE   Df Model:                            2
Date:                Mon, 28 Aug 2017   Pseudo R-squ.:                 0.07559
Time:                        21:21:45   Log-Likelihood:                -6185.7
converged:                       True   LL-Null:                       -6691.5
                                        LLR p-value:                2.134e-220
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9989      0.058     17.361      0.000       0.886       1.112
x1            -2.1230      0.

In [18]:
coefs = np.empty((1000, 3))
for i in range(1000):
    y_logistic = np.random.binomial(1, p=p, size=N)
    model = GLM(family=Bernoulli())
    model.fit(X, y_logistic)
    coefs[i, :] = model.coef_
    
print(coefs.std(axis=0))

[ 0.05690897  0.07664399  0.07699808]


## Poission Model

In [19]:
mu = np.exp(nu)
y_poisson = np.random.poisson(lam=mu, size=N)

In [20]:
model = GLM(family=Poisson())
model.fit(X, y_poisson)

<glm.GLM at 0x11cb362e8>

In [21]:
model.coef_

array([ 1.01983659, -2.01177571,  0.97607901])

In [22]:
model.coef_covariance_matrix_

array([[  3.46119846e-04,  -2.49025097e-04,  -3.63306653e-04],
       [ -2.49025097e-04,   7.17516120e-04,   7.70653269e-06],
       [ -3.63306653e-04,   7.70653269e-06,   6.17671867e-04]])

In [23]:
model.coef_standard_error_

array([ 0.0186043 ,  0.02678649,  0.02485301])

In [24]:
mod = statsmodels.discrete.discrete_model.Poisson(y_poisson, X)
res = mod.fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 1.591713
         Iterations 7
                          Poisson Regression Results                          
Dep. Variable:                      y   No. Observations:                10000
Model:                        Poisson   Df Residuals:                     9997
Method:                           MLE   Df Model:                            2
Date:                Mon, 28 Aug 2017   Pseudo R-squ.:                  0.1980
Time:                        21:21:50   Log-Likelihood:                -15917.
converged:                       True   LL-Null:                       -19848.
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0198      0.019     54.817      0.000       0.983       1.056
x1            -2.0118      0.

## Poisson with Exposures

In [25]:
mu = np.exp(nu)
expos = np.random.uniform(0, 10, size=N)
y_poisson = np.random.poisson(lam=(mu*expos), size=N)

In [26]:
model = GLM(family=Poisson())
model.fit(X, y_poisson, offset=np.log(expos))

<glm.GLM at 0x11cb4f940>

In [27]:
model.coef_

array([ 0.99448529, -1.97796662,  0.99545415])

## Linear Model with Correlated Predictors

In [37]:
N = 100
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = 0.5*X[:, 1] + np.random.uniform(-0.5, 0.5, size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

In [38]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x11cc527f0>

In [39]:
model.coef_

array([ 1.02880297, -1.66924764,  0.30040936])

In [40]:
model.coef_covariance_matrix_

array([[ 0.04692802, -0.07388581,  0.00248534],
       [-0.07388581,  0.20707993, -0.09998385],
       [ 0.00248534, -0.09998385,  0.20347317]])

In [41]:
model.coef_standard_error_

array([ 0.21662876,  0.45506036,  0.45108   ])

In [42]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.135
Model:                            OLS   Adj. R-squared:                  0.117
Method:                 Least Squares   F-statistic:                     7.551
Date:                Mon, 28 Aug 2017   Prob (F-statistic):           0.000896
Time:                        21:22:56   Log-Likelihood:                -155.26
No. Observations:                 100   AIC:                             316.5
Df Residuals:                      97   BIC:                             324.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0288      0.217      4.749      0.0