In [1]:
import numpy as np
from families import Gaussian, Bernoulli, Poisson
from glm import GLM

import statsmodels.api as sm

  from pandas.core import datetools


In [2]:
N = 10000
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = np.random.uniform(size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

## Linear Model

In [3]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x10d932278>

In [4]:
model.coef_

array([ 1.01492627, -2.02602513,  1.01937948])

In [5]:
model.parameter_covariance_

array([[  6.99369853e-04,  -5.95623178e-04,  -6.01562205e-04],
       [ -5.95623178e-04,   1.18036091e-03,  -8.43455431e-07],
       [ -6.01562205e-04,  -8.43455431e-07,   1.21180406e-03]])

In [6]:
np.sqrt(model.parameter_covariance_)

  """Entry point for launching an IPython kernel.


array([[ 0.0264456 ,         nan,         nan],
       [        nan,  0.03435638,         nan],
       [        nan,         nan,  0.03481098]])

In [7]:
mod = sm.OLS(y, X)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.302
Model:                            OLS   Adj. R-squared:                  0.302
Method:                 Least Squares   F-statistic:                     2166.
Date:                Sat, 26 Aug 2017   Prob (F-statistic):               0.00
Time:                        19:46:11   Log-Likelihood:                -14176.
No. Observations:               10000   AIC:                         2.836e+04
Df Residuals:                    9997   BIC:                         2.838e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0149      0.026     38.378      0.0

## Linear Model with Sample Weights

In [8]:
sample_weights = np.random.uniform(0, 2, size=N)

In [9]:
model = GLM(family=Gaussian())
model = model.fit(X, y, sample_weights=sample_weights)

In [10]:
model.coef_

array([ 1.00143275, -1.9998373 ,  1.01588439])

## Logistic Model

In [30]:
p = 1 / (1 + np.exp(-nu))
y_logistic = np.random.binomial(1, p=p, size=N)

In [32]:
model = GLM(family=Bernoulli())
model.fit(X, y_logistic)

<glm.GLM at 0x11b2d1748>

In [33]:
model.coef_

array([ 1.096427  , -2.56991935,  1.19799606])

In [34]:
model.dispersion_

1.3131972655817188

In [35]:
model.parameter_covariance_

array([[ 0.01171329, -0.02126638,  0.00361504],
       [-0.02126638,  0.73670506, -0.77099086],
       [ 0.00361504, -0.77099086,  0.84894063]])

In [48]:
np.sqrt(model.parameter_covariance_)# / model.dispersion_ )

  """Entry point for launching an IPython kernel.


array([[ 0.10695435,         nan,  0.06535486],
       [        nan,  0.85876483,         nan],
       [ 0.06535486,         nan,  0.92170581]])

In [37]:
mod = sm.Logit(y_logistic, X)
res = mod.fit()
print(res.summary())

Optimization terminated successfully.
         Current function value: 0.654629
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                 1000
Model:                          Logit   Df Residuals:                      997
Method:                           MLE   Df Model:                            2
Date:                Sat, 26 Aug 2017   Pseudo R-squ.:                 0.03125
Time:                        19:47:08   Log-Likelihood:                -654.63
converged:                       True   LL-Null:                       -675.75
                                        LLR p-value:                 6.731e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0964      0.134      8.209      0.000       0.835       1.358
x1            -2.5699      1.

In [38]:
coefs = np.empty((1000, 3))
for i in range(1000):
    y_logistic = np.random.binomial(1, p=p, size=N)
    model = GLM(family=Bernoulli())
    model.fit(X, y_logistic)
    coefs[i, :] = model.coef_
    
print(coefs.std(axis=0))

[ 0.1296467   1.06416649  1.15950852]


## Poission Model

In [19]:
mu = np.exp(nu)
y_poisson = np.random.poisson(lam=mu, size=N)

In [20]:
model = GLM(family=Poisson())
model.fit(X, y_poisson)

<glm.GLM at 0x11b198d30>

In [21]:
model.coef_

array([ 0.98831824, -1.95842387,  1.00836196])

## Poisson with Exposures

In [22]:
mu = np.exp(nu)
expos = np.random.uniform(0, 10, size=N)
y_poisson = np.random.poisson(lam=(mu*expos), size=N)

In [23]:
model = GLM(family=Poisson())
model.fit(X, y_poisson, offset=np.log(expos))

<glm.GLM at 0x11b19d128>

In [24]:
model.coef_

array([ 0.98339375, -1.97922538,  1.01128097])

## Linear Model with Correlated Predictors

In [25]:
N = 1000
X = np.empty(shape=(N, 3))
X[:, 0] = 1.0
X[:, 1] = np.random.uniform(size=N)
X[:, 2] = 0.9*X[:, 1] + np.random.uniform(-0.1, 0.1, size=N)
nu = 1 - 2*X[:, 1] + X[:, 2]

In [26]:
y = nu + np.random.normal(size=N)
model = GLM(family=Gaussian())
model.fit(X, y)

<glm.GLM at 0x11b19d470>

In [27]:
model.coef_

array([ 0.94969807, -1.3979989 ,  0.39447939])

In [28]:
model.parameter_covariance_

array([[ 0.00406196, -0.00705101,  0.00080253],
       [-0.00705101,  0.27824347, -0.29236955],
       [ 0.00080253, -0.29236955,  0.32274869]])