# Estimating Binary Logit Models with SciPy Optimize and Generic Likelihood Model

Author: Lachlan Deer, April 2017

In [1]:
import numpy as np
from scipy import optimize as opt

## Simulate Data

In [2]:
# Model Primitives
np.random.seed(1234567890)

nObs = 10000
beta = np.array([0.5, 0.5] , dtype=float)
income= np.random.uniform(size = nObs) # draws from standard normal
explVar = np.vstack([np.ones(nObs), income]).T

In [3]:
def simulateBinaryLogit(x, beta):
    nObs     = x.shape[0]
    nChoice  = 2;
    
    epsilon = np.random.gumbel(size = [nObs, nChoice])
    beta_augmented = np.vstack([np.zeros(beta.shape), beta])
    utility = x @ beta_augmented.T + epsilon
    return np.argmax(utility, axis=1)

In [4]:
# verify choice data looks like it should
choice = simulateBinaryLogit(explVar, beta)
choice[1:10]

array([0, 1, 1, 1, 1, 0, 1, 1, 1], dtype=int64)

In [5]:
data = np.hstack((choice.reshape(nObs, 1), explVar))
data.shape

(10000, 3)

## Defining Functions for Optimization

In [6]:
from scipy.stats import logistic

def logLike_binaryLogit(beta, y, x):

    choiceProb   = logistic.cdf(x @ beta);
    
    ll_i         = np.log( (y==1) * (choiceProb) + (y==0) * (1 - choiceProb))
    logLike      = -(ll_i.sum())
    return logLike

## Running the Optimization

In [7]:
beta0 = np.zeros(2)
out = opt.minimize(logLike_binaryLogit, beta0, args=(data[:,0], data[:,1:]) , method='L-BFGS-B', tol=1e-12)

print('beta hat is:', out.x)

print('value of likelihood at beta:', out.fun)

# how to get SE's?
out

beta hat is: [ 0.58517464  0.40357804]
value of likelihood at beta: 6204.11675398


      fun: 6204.1167539810767
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ -9.09494702e-05,  -9.09494702e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 36
      nit: 11
   status: 0
  success: True
        x: array([ 0.58517464,  0.40357804])

In [None]:
# get standard errors from inverse hessian (approximate)
np.sqrt(np.diagonal(out.hess_inv.todense()))

In [None]:
import statsmodels.tools.numdiff as smt


In [None]:
#Get inverse hessian
b_estimates = out.x
hessian = smt.approx_hess3(b_estimates, logLike_binaryLogit, args=(data[:,0], data[:,1:]))
invHessian = np.linalg.inv(hessian)

print(invHessian)
np.sqrt(np.diagonal(invHessian))

## An Intro to Simulated Maximum Likelihood

In [8]:
# Simulated Maximum Likelihood

def logLikeSim_binaryLogit(beta, y, x, nSim):
    np.random.seed(42)
    
    nObs = y.shape[0]
    
    simChoice = np.empty((nObs,nSim))
    simChoice[:] = np.NAN
    
    for iSim in range(0, nSim):
        simChoice[:,iSim] = simulateBinaryLogit(x, beta)
    
    simProb = simChoice.mean(axis=1)
    
    ll_i         = np.log((y==1) * simProb + (y==0) *(1 - simProb))
    logLike      = -(ll_i.sum())
    return logLike
    


In [9]:
beta0 = 0.4*np.ones(2)
nSim = 1000

out = opt.minimize(logLikeSim_binaryLogit, beta0, args=(data[:,0], data[:,1:], nSim) , method='L-BFGS-B', \
             options={'gtol': 1e-5, 'eps': 1e-05, 'ftol': 1e-8})

print('beta hat is:', out.x)

print('value of likelihood at beta:', out.fun)

# how to get SE's?
out

  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]


beta hat is: [ 0.39999826  0.40000022]
value of likelihood at beta: 6245.86089205


      fun: 6245.8608920466713
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 251.74474421,   -2.81479612])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 45
      nit: 2
   status: 0
  success: True
        x: array([ 0.39999826,  0.40000022])

In [None]:
#Get inverse hessian - this is probably wrong, I think we need to inflate the std errors by a something proportional nSim
b_estimates = out.x
hessian = smt.approx_hess3(b_estimates, logLikeSim_binaryLogit, args=(data[:,0], data[:,1:], nSim))
invHessian = np.linalg.inv(hessian)

print(invHessian)
np.sqrt(np.diagonal(invHessian))

## Comparing SciPy Results to the Canned Logit Estimator

In [None]:
## Compare to the canned logit model 

import statsmodels.api as sm

logit_mod = sm.Logit(data[:,0], data[:,1:])
logit_res = logit_mod.fit(disp=0)
print('Parameters: ', logit_res.params)

In [None]:
logit_res.summary()

## Maximum Likelihood Estimations using the GenericLikelihoodModel Class

In [None]:
from statsmodels.base.model import GenericLikelihoodModel


### Standard Likelihood Problem

In [None]:
class MyLogit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        
        choiceProb   = logistic.cdf(self.exog @ params);
        ll_i         = np.log((endog==1) * choiceProb + (endog==0) * (1 - choiceProb))
        return ll_i.sum()

In [None]:
sm_logit_manual = MyLogit(data[:,0], data[:,1:]).fit()
print(sm_logit_manual.summary())

### Example with Simulated ML

In [None]:
class MySimulatedLogit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        
        np.random.seed(42)
        nSim = 1000
    
        nObs = endog.shape[0]
    
        simChoice = np.empty((nObs,nSim))
        simChoice[:] = np.NAN

        for iSim in range(0, nSim):
            simChoice[:,iSim] = simulateBinaryLogit(exog, params)

        simProb = simChoice.mean(axis=1)
        
        ll_i         = np.log((endog==1) * simProb + (endog==0)*(1 - simProb));
        return ll_i.sum()

In [None]:
# very sensitive to starting values! (flat likelihood?)

sm_logit_manual = MySimulatedLogit(data[:,0], data[:,1:]).fit(start_params=0.6*np.ones(2))
print(sm_logit_manual.summary())