In [1]:
import numpy as np
from families import Gaussian, Bernoulli, Poisson
from glm import GLM

import statsmodels.api as sm

  from pandas.core import datetools


In [2]:
N = 10000
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = np.random.uniform(size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

## Linear Model

In [3]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x110d55898>

In [4]:
model.coef_

array([ 0.98013251, -1.93912732,  0.98230542])

In [5]:
model.parameter_covariance_

array([[  1.38369710e-03,  -1.17718039e-03,  -1.18953000e-03],
       [ -1.17718039e-03,   2.36781362e-03,  -6.91250995e-06],
       [ -1.18953000e-03,  -6.91250995e-06,   2.36931132e-03]])

In [6]:
np.sqrt(model.parameter_covariance_)

  """Entry point for launching an IPython kernel.


array([[ 0.03719808,         nan,         nan],
       [        nan,  0.04866019,         nan],
       [        nan,         nan,  0.04867557]])

In [7]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.285
Model:                            OLS   Adj. R-squared:                  0.285
Method:                 Least Squares   F-statistic:                     1991.
Date:                Sat, 26 Aug 2017   Prob (F-statistic):               0.00
Time:                        20:17:11   Log-Likelihood:                -14132.
No. Observations:               10000   AIC:                         2.827e+04
Df Residuals:                    9997   BIC:                         2.829e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9801      0.026     37.263      0.0

## Linear Model with Sample Weights

In [8]:
sample_weights = np.random.uniform(0, 2, size=N)

In [9]:
model = GLM(family=Gaussian())
model = model.fit(X, y, sample_weights=sample_weights)

In [10]:
model.coef_

array([ 0.97027864, -1.93397144,  0.9832041 ])

## Logistic Model

In [11]:
p = 1 / (1 + np.exp(-nu))
y_logistic = np.random.binomial(1, p=p, size=N)

In [12]:
model = GLM(family=Bernoulli())
model.fit(X, y_logistic)

<glm.GLM at 0x11c546908>

In [13]:
model.coef_

array([ 0.89079908, -1.886519  ,  1.06624972])

In [14]:
model.dispersion_

1.2536497937576914

In [15]:
model.parameter_covariance_

array([[ 0.00402727, -0.0036245 , -0.00314137],
       [-0.0036245 ,  0.00731656, -0.00052538],
       [-0.00314137, -0.00052538,  0.00701823]])

In [16]:
np.sqrt(np.diag(model.parameter_covariance_) / model.dispersion_)

array([ 0.05667833,  0.07639506,  0.07482136])

In [17]:
mod = sm.Logit(y_logistic, X)
res = mod.fit()
smry = res.summary()

Optimization terminated successfully.
         Current function value: 0.626637
         Iterations 5


In [18]:
np.sqrt(np.diag(res.cov_params()))

array([ 0.05667833,  0.07639506,  0.07482136])

In [19]:
np.sqrt(np.diag(model.parameter_covariance_) / model.dispersion_) / np.sqrt(np.diag(res.cov_params()))

array([ 0.99999998,  0.99999997,  0.99999998])

In [20]:
coefs = np.empty((1000, 3))
for i in range(1000):
    y_logistic = np.random.binomial(1, p=p, size=N)
    model = GLM(family=Bernoulli())
    model.fit(X, y_logistic)
    coefs[i, :] = model.coef_
    
print(coefs.std(axis=0))

[ 0.05947178  0.07715759  0.07715062]


## Poission Model

In [21]:
mu = np.exp(nu)
y_poisson = np.random.poisson(lam=mu, size=N)

In [22]:
model = GLM(family=Poisson())
model.fit(X, y_poisson)

<glm.GLM at 0x11e656438>

In [23]:
model.coef_

array([ 1.02518515, -2.0232025 ,  0.96606876])

## Poisson with Exposures

In [24]:
mu = np.exp(nu)
expos = np.random.uniform(0, 10, size=N)
y_poisson = np.random.poisson(lam=(mu*expos), size=N)

In [25]:
model = GLM(family=Poisson())
model.fit(X, y_poisson, offset=np.log(expos))

<glm.GLM at 0x11e6565c0>

In [26]:
model.coef_

array([ 0.99643447, -2.00694648,  1.01314535])

## Linear Model with Correlated Predictors

In [27]:
N = 1000
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = 0.9*X[:, 1] + np.random.uniform(-0.1, 0.1, size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

In [28]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x11e656940>

In [29]:
model.coef_

array([ 0.98954598, -2.124973  ,  1.19180676])

In [30]:
model.parameter_covariance_

array([[  7.56850678e-03,  -1.10924726e-02,  -3.48335102e-04],
       [ -1.10924726e-02,   4.77308321e-01,  -5.05805301e-01],
       [ -3.48335102e-04,  -5.05805301e-01,   5.63415743e-01]])