# Generating Data



In [1]:
import numpy as np

In [2]:
np.random.seed(1234567890)

In [3]:
data_properties = { 'n_obs': 10000,
                    'beta': np.array([0.25, 0.75])
                    }

In [4]:
def gen_data(data_properties):
    """
        WRITE DOCS
    """
    import numpy
    
    n_obs = data_properties['n_obs']
    beta  = data_properties['beta']
    
    # make explanatory variables
    iota     = numpy.ones(n_obs).reshape(n_obs, 1)
    expl_var = numpy.random.uniform(size=[n_obs, 1])
    
    X = numpy.hstack(tup = (iota, expl_var))
    
    # generate consumer choices
    epsilon = numpy.random.gumbel(size = [n_obs, 2])
    beta_tilde = numpy.hstack([numpy.zeros(beta.size).reshape(beta.size, 1), 
                             beta.reshape(beta.size,1)]) 
    
    utility = X @ beta_tilde + epsilon
    choice = numpy.argmax(utility, axis=1)
    return [choice, X]
    

In [5]:
y, X = gen_data(data_properties)

In [6]:
y.shape

(10000,)

In [7]:
def calculate_choiceProb(expl_var, beta):
    
    from scipy.stats import logistic
    
    choice_prob = logistic.cdf( expl_var @ beta)
    
    return choice_prob

In [8]:
def log_like_i(beta, y, X):
    """
        WRITE DOCS
    """
    
    choice_prob = calculate_choiceProb(X, beta)
    
    ll_i = np.log( (y==1)*choice_prob + (y==0)*(1-choice_prob))
    
    return ll_i
    

In [9]:
def compute_log_like(beta, y, X):
    
    ll_i = log_like_i(beta, y, X)
    return -(ll_i.sum())

In [10]:
from scipy import optimize as opt

starting_vals = np.array([0.5,0.5])


result = opt.minimize( compute_log_like,
                       starting_vals,
                       args = (y, X),
                       method = 'L-BFGS-B',
                       tol = 1e-10
                        )

In [11]:
result

      fun: 6365.1542467581203
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 0.0003638 ,  0.00027285])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 30
      nit: 8
   status: 0
  success: True
        x: array([ 0.32801893,  0.68705718])

In [12]:
coeffs = result.x
print(coeffs)

[ 0.32801893  0.68705718]


In [13]:
# get std errors

from scipy import linalg as la
import statsmodels.tools.numdiff as smt

In [14]:
hessian = smt.approx_hess3(coeffs, compute_log_like, args=(y, X))

In [15]:
invHessian = la.pinv2(hessian)
np.sqrt(np.diagonal(invHessian))

array([ 0.04124549,  0.07367847])