# Estimating Binary Logit Models with SciPy Optimize and Generic Likelihood Model

Author: Lachlan Deer, April 2017

In [None]:
import numpy as np
from scipy import optimize as opt

## Simulate Data

In [2]:
# Model Primitives
np.random.seed(1234567890)

nObs = 1000
beta = np.array([0.5, 0.5] , dtype=float)
income= np.random.uniform(size = nObs) # draws from standard normal
explVar = np.vstack([np.ones(nObs), income]).T

In [3]:
def simulateBinaryLogit(x, beta):
    nObs     = x.shape[0]
    nChoice  = 2;
    
    epsilon = np.random.gumbel(size = [nObs, nChoice])
    beta_augmented = np.vstack([np.zeros(beta.shape), beta])
    utility = x @ beta_augmented.T + epsilon
    return np.argmax(utility, axis=1)

In [4]:
# verify choice data looks like it should
choice = simulateBinaryLogit(explVar, beta)
choice[1:10]

array([1, 0, 1, 1, 0, 1, 0, 0, 1], dtype=int64)

In [5]:
data = np.hstack((choice.reshape(nObs, 1), explVar))
data.shape

(1000, 3)

## Defining Functions for Optimization

In [6]:
def calcLambda(x, beta):
    prob = np.exp(x @ beta)  / (1 + np.exp(x @ beta))
    return prob

In [37]:
from scipy.stats import logistic

def logLike_binaryLogit(beta, y, x):

    choiceProb   = logistic.cdf(x @ beta);
    
    ll_i         = np.log( (y==1) * (choiceProb) + (y==0) * (1 - choiceProb))
    logLike      = -(ll_i.sum())
    return logLike

## Running the Optimization

In [38]:
beta0 = np.zeros(2)
out = opt.minimize(logLike_binaryLogit, beta0, args=(data[:,0], data[:,1:]) , method='L-BFGS-B', tol=1e-12)

print('beta hat is:', out.x)

print('value of likelihood at beta:', out.fun)

# how to get SE's?
out

beta hat is: [ 0.40157311  0.58693221]
value of likelihood at beta: 633.200201176


      fun: 633.20020117642594
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 0.,  0.])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 30
      nit: 8
   status: 0
  success: True
        x: array([ 0.40157311,  0.58693221])

In [16]:
# get standard errors from inverse hessian (approximate)
np.sqrt(np.diagonal(out.hess_inv.todense()))

array([ 0.15264816,  0.27145198])

In [28]:
import statsmodels.tools.numdiff as smt
import scipy as sc


NameError: name 'Hessian' is not defined

In [32]:
#Get inverse hessian
b_estimates = out.x
hessian = smt.approx_hess3(b_estimates, logLike_binaryLogit, args=(data[:,0], data[:,1:]))
invHessian = np.linalg.inv(hessian)

print(invHessian)
np.sqrt(np.diagonal(invHessian))

[[ 0.01809698 -0.02760171]
 [-0.02760171  0.05615583]]


array([ 0.134525  ,  0.23697222])

## An Intro to Simulated Maximum Likelihood

In [55]:
# Simulated Maximum Likelihood

def logLikeSim_binaryLogit(beta, y, x, nSim):
    np.random.seed(42)
    
    nObs = y.shape[0]
    
    simChoice = np.empty((nObs,nSim))
    simChoice[:] = np.NAN
    
    for iSim in range(0, nSim):
        simChoice[:,iSim] = simulateBinaryLogit(x, beta)
    
    simProb = simChoice.mean(axis=1)
    
    ll_i         = np.log((y==1) * simProb + (y==0) *(1 - simProb))
    logLike      = -(ll_i.sum())
    return logLike
    


In [56]:
beta0 = 0.4*np.ones(2)
nSim = 1000

out = opt.minimize(logLikeSim_binaryLogit, beta0, args=(data[:,0], data[:,1:], nSim) , method='L-BFGS-B', \
             options={'gtol': 1e-4, 'eps': 1e-04, 'ftol': 1e-8})

print('beta hat is:', out.x)

print('value of likelihood at beta:', out.fun)

# how to get SE's?
out

beta hat is: [ 0.40980455  0.47007518]
value of likelihood at beta: 634.525545482


      fun: 634.52554548208991
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 133.15737038,   98.31582826])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 204
      nit: 8
   status: 0
  success: True
        x: array([ 0.40980455,  0.47007518])

In [42]:
#Get inverse hessian - this is probably wrong
b_estimates = out.x
hessian = smt.approx_hess3(b_estimates, logLikeSim_binaryLogit, args=(data[:,0], data[:,1:], nSim))
invHessian = np.linalg.inv(hessian)

print(invHessian)
np.sqrt(np.diagonal(invHessian))

[[  5.73561694e-06  -3.58605833e-06]
 [ -3.58605833e-06   2.81700615e-06]]


array([ 0.00239491,  0.00167839])

## Comparing SciPy Results to the Canned Logit Estimator

In [188]:
## Compare to the canned logit model 

import statsmodels.api as sm

logit_mod = sm.Logit(data[:,0], data[:,1:])
logit_res = logit_mod.fit(disp=0)
print('Parameters: ', logit_res.params)

Parameters:  [ 0.40157314  0.58693222]


In [189]:
logit_res.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,1000.0
Model:,Logit,Df Residuals:,998.0
Method:,MLE,Df Model:,1.0
Date:,"Fri, 14 Apr 2017",Pseudo R-squ.:,0.004845
Time:,07:50:16,Log-Likelihood:,-633.2
converged:,True,LL-Null:,-636.28
,,LLR p-value:,0.01303

0,1,2,3,4,5
,coef,std err,z,P>|z|,[95.0% Conf. Int.]
const,0.4016,0.135,2.985,0.003,0.138 0.665
x1,0.5869,0.237,2.477,0.013,0.122 1.051


## Maximum Likelihood Estimations using the GenericLikelihoodModel Class

In [44]:
from statsmodels.base.model import GenericLikelihoodModel


### Standard Likelihood Problem

In [45]:
class MyLogit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        
        choiceProb   = logistic.cdf(self.exog @ params);
        ll_i         = np.log((endog==1) * choiceProb + (endog==0) * (1 - choiceProb))
        return ll_i.sum()

In [46]:
sm_logit_manual = MyLogit(data[:,0], data[:,1:]).fit()
print(sm_logit_manual.summary())

Optimization terminated successfully.
         Current function value: 0.633200
         Iterations: 57
         Function evaluations: 112
                               MyLogit Results                                
Dep. Variable:                      y   Log-Likelihood:                -633.20
Model:                        MyLogit   AIC:                             1270.
Method:            Maximum Likelihood   BIC:                             1280.
Date:                Sat, 15 Apr 2017                                         
Time:                        01:52:51                                         
No. Observations:                1000                                         
Df Residuals:                     998                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
-----------------------------------------------------------------------

### Example with Simulated ML

In [47]:
class MySimulatedLogit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        
        np.random.seed(42)
        nSim = 1000
    
        nObs = endog.shape[0]
    
        simChoice = np.empty((nObs,nSim))
        simChoice[:] = np.NAN

        for iSim in range(0, nSim):
            simChoice[:,iSim] = simulateBinaryLogit(exog, params)

        simProb = simChoice.mean(axis=1)
        
        ll_i         = np.log((endog==1) * simProb + (endog==0)*(1 - simProb));
        return ll_i.sum()

In [48]:
# very sensitive to starting values! (flat likelihood?)

sm_logit_manual = MySimulatedLogit(data[:,0], data[:,1:]).fit(start_params=0.6*np.ones(2))
print(sm_logit_manual.summary())

Optimization terminated successfully.
         Current function value: 0.634073
         Iterations: 25
         Function evaluations: 57
                           MySimulatedLogit Results                           
Dep. Variable:                      y   Log-Likelihood:                -634.07
Model:               MySimulatedLogit   AIC:                             1272.
Method:            Maximum Likelihood   BIC:                             1282.
Date:                Sat, 15 Apr 2017                                         
Time:                        01:54:30                                         
No. Observations:                1000                                         
Df Residuals:                     998                                         
Df Model:                           1                                         
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------