In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set_theme()

# optimization
from scipy import optimize
from numpy.random import default_rng

# For quick OLS
#import statsmodels.formula.api as smf

# Import our toolbox
import clogit as clogit
import mlogit_ante as mlogit
import estimation as est
import LinearModels as lm

# Read in data

The dataset, `cars.csv`, contains cleaned and processed data. If you want to make changes, the notebook, `materialize.ipynb`, creates the data from the raw source datsets. 

In [2]:
cars = pd.read_csv('cars.csv')
lbl_vars = pd.read_csv('labels_variables.csv')
lbl_vals = pd.read_csv('labels_values.csv')

# convert from dataframe to dict
lbl_vals = {c: lbl_vals[c].dropna().to_dict() for c in lbl_vals.columns}

In [3]:
lbl_vars.set_index('variable', inplace=True)

## Overview of the dataset

In [4]:
lbl_vars.join(cars.mean(numeric_only=True).apply(lambda x: f'{x: .2f}').to_frame('Mean'))

Unnamed: 0_level_0,label,Mean
variable,Unnamed: 1_level_1,Unnamed: 2_level_1
ye,year (=first dimension of panel),84.5
ma,market (=second dimension of panel),3.0
co,model code (=third dimension of panel),207.5
zcode,alternative model code (predecessors and succe...,177.76
brd,brand code,16.79
type,name of brand and model,
brand,name of brand,
model,name of model,
org,"origin code (demand side, country with which c...",2.72
loc,"location code (production side, country where ...",5.17


In [5]:
temp = lbl_vars.join(cars.mean(numeric_only=True).apply(lambda x: f'{x: .2f}').to_frame('Mean'))
temp['label']['princ']

'=pr/(ngdp/pop): price relative to per capita income (often used in demand model)'

# Set up for analysis

In [6]:
price_var = 'princ'

In [7]:
cars['logp'] = np.log(cars[price_var])

In [8]:
# new variable: price elasticity heterogeneous for home-region 
cars['logp_x_home'] = cars['logp'] * cars['home']

# Scaling for numerical stability
cars['cy']/=1000
cars['hp']/=100
cars['we']/=1000

### Dummy variables

For working with matrices, we want to have a column for each dummy variable. 

In [9]:
categorical_var = 'brand' # name of categorical variable
dummies = pd.get_dummies(cars[categorical_var]) # creates a matrix of dummies for each value of dummyvar
x_vars_dummies = list(dummies.columns[1:].values) # omit a reference category, here it is the first (hence columns[1:])

# add dummies to the dataframe 
assert dummies.columns[0] not in cars.columns, f'It looks like you have already added this dummy to the dataframe. Avoid duplicates! '
cars = pd.concat([cars,dummies], axis=1)

In [10]:
# NB! Let's take a look at the reference category
dummies.columns[0] #This is BMW
# Something might be going on 

'BMW'

### `x_vars`: List of regressors to be used 

In [11]:
x_vars = ['logp', 'home', 'logp_x_home', 'cy', 'hp', 'we', 'li', 'he'] + x_vars_dummies
# If we include dummies for the brand, the 3-D matrix x becomes singular. Why?
# 
print(f'K = {len(x_vars)} variables selected.')

K = 40 variables selected.


In [12]:
cars['co'].nunique() #Different car models - some cars are in the top 40 most sold for a market and/or year

285

In [13]:
K = len(x_vars) #The "household" characteristics
N = cars.ma.nunique() * cars.ye.nunique() #The market-year 'i'
J = 40 #The 40 different cars
x = cars[x_vars].values.reshape((N,J,K))

In [14]:
cars['logp_x_home'].mean()

-0.0944031249329464

In [15]:
cars['logp'].mean()

-0.3606223052101116

In [16]:
y = cars['s'].values #The market shares 
y = y.reshape((N,J)) #The market shares in market-year N for J=40 different cars

In [17]:
# Let's look at a particular 'market-year' to see what is going on

test = cars[(cars['ma']==1) & (cars['ye']==70)] 


# Interpretation: Each 'market-year', the top 40 most selling cars make up about 99.99% of all cars sold.
    # Each market-year share column (called "s" in the data) sums to one


In [18]:
test

Unnamed: 0,ye,ma,co,zcode,brd,type,brand,model,org,loc,...,seat,skoda,suzuki,tal/hillman,tal/matra,tal/simca,tal/sunb,talbot,toyota,volvo
0,70,1,15,14,2,audi 100/200,audi,100/200,2,4,...,0,0,0,0,0,0,0,0,0,0
1,70,1,26,35,4,citroen 2 CV 6 - 2 CV 4,citroen,2CV6,1,3,...,0,0,0,0,0,0,0,0,0,0
2,70,1,36,36,4,citroen dyane,citroen,dyane,1,3,...,0,0,0,0,0,0,0,0,0,0
3,70,1,64,67,7,fiat 128,fiat,128,3,5,...,0,0,0,0,0,0,0,0,0,0
4,70,1,71,80,8,ford escort,ford,escort,2,4,...,0,0,0,0,0,0,0,0,0,0
5,70,1,134,159,16,mercedes 200-300,mercedes,200,2,4,...,0,0,0,0,0,0,0,0,0,0
6,70,1,165,197,19,opel kadett,opel,kadet,2,4,...,0,0,0,0,0,0,0,0,0,0
7,70,1,172,194,19,opel rekord,opel,record,2,4,...,0,0,0,0,0,0,0,0,0,0
8,70,1,186,202,20,peugeot 504,peugeot,504,1,3,...,0,0,0,0,0,0,0,0,0,0
9,70,1,187,207,20,peugeot 304,peugeot,304,1,3,...,0,0,0,0,0,0,0,0,0,0


In [19]:
test['s'].sum() #The share of total sales for the top 40 most cars sum to one

0.9999999999999979

$$
u_{i j h}=\mathbf{x}_{i j} \boldsymbol{\beta}_o+\varepsilon_{i j h}, \quad j=1, \ldots, J
$$

where: 

- $i$ is the $\textit{country-year}$ pair
- $j$ is the alternative car
- $h$ is the household

First off: Are we: 

    1. interested in the marginal utility of a car's characteristic (conditional logit) or 
    
    2. the change in utility of car $j$ relative to car 1 given a change in household characteristics?

In this assignment, we are examning home bias - that is the propensity to choose a car manufactured in the home country. We are therefore interested in 1) and will use a conditional logit model.

In [20]:
# Our conditional choice probabilities
# For coefficients ('theta') starting at zero, these must be equal to zero
# Intuition: No utility is gained by any car characteristics, thus market shares must be equal. Let's check this.
theta_start = clogit.starting_values(y,x)
ccp = clogit.choice_prob(theta_start, x)
(ccp == 1/J) # all choice probs are equal to each other.

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [21]:
# In the first market-year, what are the choice-probs? (Given individuals place no weight on any car characteristics)
ccp.sum(axis=1) #Summing over the columns J equal one. Goods

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [22]:
print(ccp.shape)
np.unique(ccp) #Checking if we have any NaN

(150, 40)


array([0.025])

In [23]:
temp_df = pd.DataFrame(x[50]) #Looking at the 50th market-year
temp_df.columns=x_vars
temp_df.head()

Unnamed: 0,logp,home,logp_x_home,cy,hp,we,li,he,MCC,VW,...,seat,skoda,suzuki,tal/hillman,tal/matra,tal/simca,tal/sunb,talbot,toyota,volvo
0,-0.43865,0.0,-0.0,1.3,0.63,0.94,7.8,135.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.226922,0.0,-0.0,1.8,0.66,1.05,7.6,139.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.143563,0.0,-0.0,1.6,0.75,1.08,8.5,138.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.407682,0.0,0.0,2.0,1.1,1.425,9.8,141.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.891653,1.0,-0.891653,1.0,0.33,0.64,5.0,135.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
T=5
y_ols = np.log(y).reshape((N*J),1)
x_ols = np.hstack([np.ones((N*J,1)), x.reshape(N*J,K)])

In [25]:
y_ols.shape

(6000, 1)

In [26]:
res_ols = lm.estimate(y_ols,x_ols)
ols_out = lm.outreg(res_ols, var_labels=['cons_']+x_vars, name='OLS')


In [28]:
res = est.estimate(clogit.q2, theta_start, y, x, cov_type='Sandwich')

Optimization terminated successfully.
         Current function value: 3.471981
         Iterations: 289
         Function evaluations: 11931
         Gradient evaluations: 291


In [29]:
res_dict = {'theta_hat': res['theta'], 'se': res['se'], 't-stat':res['t'], 'cov_hat': res['cov']}
clogit_out = clogit.outreg(res_dict, var_labels=x_vars, name='Logit')

In [30]:
x_vars[:8]

['logp', 'home', 'logp_x_home', 'cy', 'hp', 'we', 'li', 'he']

In [31]:
tab1 = pd.merge(clogit_out[:8*2], ols_out[:9*2], 'right', left_index=True, right_index=True).fillna('.')
tab1

Unnamed: 0,Unnamed: 1,Logit,OLS
cons_,theta,.,-2.37***
cons_,se,.,(0.311)
logp,theta,-0.25**,-0.41***
logp,se,(0.128),(0.042)
home,theta,1.42***,1.04***
home,se,(0.044),(0.03)
logp_x_home,theta,0.14**,0.18***
logp_x_home,se,(0.061),(0.056)
cy,theta,-0.12,-0.35***
cy,se,(0.074),(0.066)


In [32]:
print(tab1.rename(index={'theta':'', 'se':''}).to_latex())

\begin{tabular}{llll}
\toprule
   & {} &     Logit &       OLS \\
\midrule
cons\_ & {} &         . &  -2.37*** \\
   & {} &         . &   (0.311) \\
logp & {} &   -0.25** &  -0.41*** \\
   & {} &   (0.128) &   (0.042) \\
home & {} &   1.42*** &   1.04*** \\
   & {} &   (0.044) &    (0.03) \\
logp\_x\_home & {} &    0.14** &   0.18*** \\
   & {} &   (0.061) &   (0.056) \\
cy & {} &     -0.12 &  -0.35*** \\
   & {} &   (0.074) &   (0.066) \\
hp & {} &  -1.52*** &  -1.06*** \\
   & {} &   (0.187) &   (0.149) \\
we & {} &   0.65*** &   1.28*** \\
   & {} &   (0.188) &   (0.127) \\
li & {} &   -0.04** &  -0.06*** \\
   & {} &   (0.017) &   (0.008) \\
he & {} &  -0.01*** &  -0.01*** \\
   & {} &   (0.003) &   (0.002) \\
\bottomrule
\end{tabular}



  print(tab1.rename(index={'theta':'', 'se':''}).to_latex())


In [32]:
theta_hat = res_dict['theta_hat']
cov_hat = res_dict['cov_hat']

In [33]:
cov_hat

array([[ 1.63719775e-02, -1.69246107e-04, -1.52776839e-04, ...,
         1.01010529e-05, -2.45587991e-05, -9.30762888e-04],
       [-1.69246107e-04,  1.93494219e-03,  1.88337910e-03, ...,
        -6.60247903e-05, -2.94427614e-04,  2.90504470e-04],
       [-1.52776839e-04,  1.88337910e-03,  3.71569972e-03, ...,
        -6.60954863e-04, -9.20908838e-04,  5.99359435e-04],
       ...,
       [ 1.01010529e-05, -6.60247903e-05, -6.60954863e-04, ...,
         5.81010548e-03,  1.65309474e-03,  1.94384569e-04],
       [-2.45587991e-05, -2.94427614e-04, -9.20908838e-04, ...,
         1.65309474e-03,  4.29781884e-03, -1.10044974e-03],
       [-9.30762888e-04,  2.90504470e-04,  5.99359435e-04, ...,
         1.94384569e-04, -1.10044974e-03,  1.16121467e-03]])

In [34]:
theta_hat

array([-0.2536602 ,  1.42205277,  0.13869701, -0.12068846, -1.52162443,
        0.64925158, -0.03674792, -0.01194046, -1.18989656,  0.17930946,
       -0.76774873, -0.12134582, -0.47523748, -0.51768112, -0.78863372,
       -0.15280806,  0.01079063, -0.29341559, -0.72032055, -1.35456638,
       -0.97493655, -0.38385169,  0.51718291, -0.47730258, -0.17256599,
       -0.03526939, -0.07978525,  0.01100992, -0.43755445, -0.65733916,
       -0.77744072, -0.89463044, -1.18362122, -1.27901679, -2.83607786,
       -0.43058234, -3.40089892, -0.41572431, -0.06182238,  0.01338732])

In [35]:
ccp_hat = clogit.choice_prob(theta_hat, x)

In [36]:
market_share = cars['s'].values.reshape((N,J))
market_share[0] # For the first market-year, what are the observed market shares of the 40 different cars?

array([0.01129646, 0.01464355, 0.02803195, 0.03263421, 0.03974679,
       0.02786459, 0.04393066, 0.0460226 , 0.02091936, 0.01025049,
       0.03598131, 0.02991469, 0.03347098, 0.01861823, 0.03075147,
       0.03911921, 0.05857422, 0.01255162, 0.00920452, 0.00878613,
       0.00920452, 0.01548033, 0.01945501, 0.03127445, 0.01799065,
       0.01380678, 0.00962291, 0.03012388, 0.03179743, 0.01045968,
       0.01108726, 0.02259291, 0.02322049, 0.02510324, 0.02656759,
       0.05899261, 0.01589872, 0.02091936, 0.03233297, 0.02175614])

In [37]:
ccp_hat[0] # For the first market-year, what are the estimated market shares of the 40 different cars given our control variables?

array([0.01739914, 0.02260737, 0.02371833, 0.02409675, 0.03661417,
       0.027083  , 0.02993979, 0.02919435, 0.01789385, 0.0228626 ,
       0.03706877, 0.02580733, 0.03487534, 0.02824443, 0.02455711,
       0.02299095, 0.03991113, 0.02311779, 0.02646776, 0.01757049,
       0.01374869, 0.02309152, 0.0108937 , 0.01927279, 0.02257808,
       0.01956035, 0.01490893, 0.03436949, 0.03155966, 0.01677118,
       0.02338755, 0.02959199, 0.0289206 , 0.02034579, 0.03489161,
       0.02340906, 0.02120393, 0.03646579, 0.01792578, 0.02508305])

In [38]:
def cross_price_elas(x, theta):

    N,J,K = x.shape
    E       = np.zeros((N,J))
    k_price = 0
    k_home = 1
    k_price_x_home = 2    

    ccp = clogit.choice_prob(theta, x)

    # analytical
    for j in range(J):
        k_not_j = [k for k in range(J) if k != j]
        E[:, j] = np.mean(-(theta[k_price]+x[:,k_not_j,k_home]*theta[k_price_x_home])*ccp[:, k_not_j], axis=1)
    
    
    return np.mean(E)

In [39]:
def own_price_elas(x, theta):
    N,J,K = x.shape
    E       = np.zeros((N,J))
    k_price = 0
    k_home = 1
    k_price_x_home = 2
    
    ccp = clogit.choice_prob(theta, x)
    
    # Analytically 
    for j in range(J):
        E[:, j] = (theta[k_price]+x[:,j,k_home]*theta[k_price_x_home])*(1-ccp[:,j])
    
    return np.mean(E)

In [40]:
def own_price_elas_home(x, theta):
    N,J,K = x.shape
    E       = np.zeros((N,J))
    k_price = 0
    k_home = 1
    k_price_x_home = 2
    
    ccp = clogit.choice_prob(theta, x)
    
    # Analytically 
    for j in range(J):
        E[:, j] = (theta[k_price]+x[:,j,k_home]*theta[k_price_x_home])*(1-ccp[:,j])
        idx = x[:, :, 1]==1 #Evaluated at all home-produced cars
    
    return np.mean(E[idx])

In [41]:
def own_price_elas_foreign(x, theta):
    N,J,K = x.shape
    E       = np.zeros((N,J))
    k_price = 0
    k_home = 1
    k_price_x_home = 2
    
    ccp = clogit.choice_prob(theta, x)
    
    # Analytically 
    for j in range(J):
        E[:, j] = (theta[k_price]+x[:,j,k_home]*theta[k_price_x_home])*(1-ccp[:,j])
        idx = x[:, :, 1]==0 #Evaluated at all foreign-produced cars
    
    return np.mean(E[idx])

In [42]:
own_price_elas(x, theta_hat)

-0.20536080697229842

In [43]:
own_price_elas_home(x, theta_hat)

-0.10994118393908991

In [44]:
own_price_elas_foreign(x, theta_hat)

-0.24951157355372933

In [45]:
cross_price_elas(x, theta_hat)

0.004424907588514078

The Delta Method tells us how to compute standard errors for $\mathbf{h}(\hat{\boldsymbol{\theta}})$ based on an estimated covariance matrix for $\hat{\boldsymbol{\theta}}$. To do this, define the $K \times K$ matrix of derivatives of $\mathbf{h}$
$$ \mathbf{g} = \nabla_\theta \mathbf{h}(\hat{\boldsymbol{\theta}}).$$

Then we have 
$$ \text{Avar}[\mathbf{h}(\hat{\boldsymbol{\theta}})] = \mathbf{g} \, \text{Avar}(\hat{\boldsymbol{\theta}}) \, \mathbf{g}'$$
 
From the note, we have 

$$ \mathbf{g} = \phi(\mathbf{x}_0 \hat{\boldsymbol{\beta}}) 
    \left [\mathbf{I}_{K\times K} - (\hat{\boldsymbol{\beta}} \hat{\boldsymbol{\beta}}') (\mathbf{x}_0' \mathbf{x}_0) \right] $$ 

Note that both $\hat{\boldsymbol{\beta}} \hat{\boldsymbol{\beta}}'$ and $\mathbf{x}_0' \mathbf{x}_0$ are $K \times K$ matrices (outer products). 

In [46]:
def delta(func,cov_hat, theta_hat, x):
    h = func(x, theta_hat) #This calculates the own-price/cross-price elasiticity
    h2 = lambda coef: func(x, coef) #Defining a helper function that takes a single input (estimates from our logit model). So this can be passed to the gradient function
    grad = est.centered_grad(h2, theta_hat) #Gradient evaluated of the output at the given theta_hat
    variance = grad@cov_hat@grad.T 
    se = np.sqrt(np.diag(variance))
    t_stat = h/se 
    res = {'theta_hat': h.round(3), 'se': se[0].round(4), 't-stat': t_stat[0].round(4)}
    return res

In [47]:
own_price_elas_dict = delta(own_price_elas, cov_hat, theta_hat, x)
cross_price_elas_dict = delta(cross_price_elas, cov_hat, theta_hat, x)

own_price_elas_dict_home = delta(own_price_elas_home, cov_hat, theta_hat, x)
own_price_elas_dict_foreign = delta(own_price_elas_foreign, cov_hat, theta_hat, x)

In [48]:
tab1 = clogit.outreg(own_price_elas_dict, var_labels=['Own-price elasticity'], name='Elasticity')

tab2 = clogit.outreg(own_price_elas_dict_home, var_labels=['Own-price elasticity - home produced'], name='Elasticity')

tab3 = clogit.outreg(own_price_elas_dict_foreign, var_labels=['Own-price elasticity - foreign produced'], name='Elasticity')

tab4 = clogit.outreg(cross_price_elas_dict, var_labels=['Cross-price elasticity'], name='Elasticity')

tabs = [tab1, tab2, tab3, tab4]

print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())


\begin{tabular}{lll}
\toprule
                       & {} & Elasticity \\
\midrule
Own-price elasticity & {} &       -0.2 \\
                       & {} &    (0.126) \\
Own-price elasticity - home produced & {} &      -0.11 \\
                       & {} &    (0.134) \\
Own-price elasticity - foreign produced & {} &    -0.25** \\
                       & {} &    (0.126) \\
Cross-price elasticity & {} &        0.0 \\
                       & {} &    (0.003) \\
\bottomrule
\end{tabular}



  print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())


---

### Numerical elasticities

1) Delta method (can we do this?)
2) Bootstrapping

In [49]:
def own_price_elas_num(x, theta):

    ccp1 = clogit.choice_prob(theta, x)

    E_own   = np.zeros((N, J))
    E_cross = np.zeros((N, J))
    dpdx    = np.zeros((N, J))
    k_price = 0 
    k_home = 1
    k_price_x_home = 2

    for j in range(J):
        # A. copy 
        x2 = x.copy()
        
        # B. increase price just for car j 
        rel_change_x = 1e-3
        x2[:, j, k_price] += rel_change_x # log p changes
        x2[:, j, k_price_x_home] = x2[:, j, k_price]*x2[:, j, k_home] # logp_x_home changes as well

        # C. evaluate CCPs
        ccp2 = clogit.choice_prob(theta, x2) # Fill in 
        
        # D. percentage change in CCPs 
        rel_change_y = ccp2/ccp1-1 # Fill in 
        
        # E. elasticities 
        elasticity = rel_change_y/rel_change_x # Fill in 
        
        E_own[:, j] = elasticity[:, j] # Fill in 
        
        k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j -> this list changes as we loop through j
        E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j, taking the average over axis=1 (the cars, not the individuals!)

        return np.mean(E_own)

In [50]:
def cross_price_elas_num(x, theta):

    ccp1 = clogit.choice_prob(theta, x)

    E_own   = np.zeros((N, J))
    E_cross = np.zeros((N, J))
    dpdx    = np.zeros((N, J))
    k_price = 0 
    k_home = 1
    k_price_x_home = 2

    for j in range(J):
        # A. copy 
        x2 = x.copy()
        
        # B. increase price just for car j 
        rel_change_x = 1e-3
        x2[:, j, k_price] += rel_change_x # Fill in 
        x2[:, j, k_price_x_home] = x2[:, j, k_price]*x2[:, j, k_home] # logp_x_home changes as well


        # C. evaluate CCPs
        ccp2 = clogit.choice_prob(theta, x2) # Fill in 
        
        # D. percentage change in CCPs 
        rel_change_y = ccp2/ccp1-1 # Fill in 
        
        # E. elasticities 
        elasticity = rel_change_y/rel_change_x # Fill in 
        
        E_own[:, j] = elasticity[:, j] # Fill in 
        
        k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j -> this list changes as we loop through j
        E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j, taking the average over axis=1 (the cars, not the individuals!)

        return np.mean(E_cross)

In [61]:
def own_price_elas_num_home(x, theta):

    ccp1 = clogit.choice_prob(theta, x)

    E_own   = np.zeros((N, J))
    E_cross = np.zeros((N, J))
    dpdx    = np.zeros((N, J))
    k_price = 0 
    k_home = 1
    k_price_x_home = 2

    for j in range(J):
        # A. copy 
        x2 = x.copy()
        
        # B. increase price just for car j 
        rel_change_x = 1e-3
        x2[:, j, k_price] += rel_change_x # Fill in 
        x2[:, j, k_price_x_home] = x2[:, j, k_price]*x2[:, j, k_home] # logp_x_home changes as well


        # C. evaluate CCPs
        ccp2 = clogit.choice_prob(theta, x2) # Fill in 
        
        # D. percentage change in CCPs 
        rel_change_y = ccp2/ccp1-1 # Fill in 
        
        # E. elasticities 
        elasticity = rel_change_y/rel_change_x # Fill in 
        
        E_own[:, j] = elasticity[:, j] # Fill in 
        
        k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j -> this list changes as we loop through j
        E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j, taking the average over axis=1 (the cars, not the individuals!)

        # F. Home produced cars
        idx = x[:, :, k_home] == 1

        return np.mean(E_own[idx])

In [52]:
def own_price_elas_num_foreign(x, theta):

    ccp1 = clogit.choice_prob(theta, x)

    E_own   = np.zeros((N, J))
    E_cross = np.zeros((N, J))
    dpdx    = np.zeros((N, J))
    k_price = 0 
    k_home = 1
    k_price_x_home = 2

    for j in range(J):
        # A. copy 
        x2 = x.copy()
        
        # B. increase price just for car j 
        rel_change_x = 1e-3
        x2[:, j, k_price] += rel_change_x # Fill in 
        x2[:, j, k_price_x_home] = x2[:, j, k_price]*x2[:, j, k_home] # logp_x_home changes as well


        # C. evaluate CCPs
        ccp2 = clogit.choice_prob(theta, x2) # Fill in 
        
        # D. percentage change in CCPs 
        rel_change_y = ccp2/ccp1-1 # Fill in 
        
        # E. elasticities 
        elasticity = rel_change_y/rel_change_x # Fill in 
        
        E_own[:, j] = elasticity[:, j] # Fill in 
        
        k_not_j = [k for k in range(J) if k != j] # indices for all other cars than j -> this list changes as we loop through j
        E_cross[:, j] = elasticity[:, k_not_j].mean(axis=1) # Fill in: Avg. among the cars k_not_j, taking the average over axis=1 (the cars, not the individuals!)

        # F. Foreign produced cars
        idx = x[:, :, k_home] == 0

        return np.mean(E_own[idx])

1) Delta method

In [62]:
own_price_elas_dict_num = delta(own_price_elas_num, cov_hat, theta_hat, x)
cross_price_elas_dict_num = delta(cross_price_elas_num, cov_hat, theta_hat, x)

own_price_elas_dict_num_home = delta(own_price_elas_num_home, cov_hat, theta_hat, x)
own_price_elas_dict_num_foreign = delta(own_price_elas_num_foreign, cov_hat, theta_hat, x)

tab1 = clogit.outreg(own_price_elas_dict_num, var_labels=['Own-price elasticity'], name='NUMERICAL Elasticity')

tab2 = clogit.outreg(own_price_elas_dict_num_home, var_labels=['Own-price elasticity - home produced'], name='NUMERICAL Elasticity')

tab3 = clogit.outreg(own_price_elas_dict_num_foreign, var_labels=['Own-price elasticity - foreign produced'], name='NUMERICAL Elasticity')

tab4 = clogit.outreg(cross_price_elas_dict_num, var_labels=['Cross-price elasticity'], name='NUMERICAL Elasticity')

tabs = [tab1, tab2, tab3, tab4]

print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())

\begin{tabular}{lll}
\toprule
                       & {} & NUMERICAL Elasticity \\
\midrule
Own-price elasticity & {} &                 -0.0 \\
                       & {} &              (0.003) \\
Own-price elasticity - home produced & {} &                 -0.0 \\
                       & {} &              (0.004) \\
Own-price elasticity - foreign produced & {} &              -0.01** \\
                       & {} &              (0.003) \\
Cross-price elasticity & {} &                  0.0 \\
                       & {} &                (0.0) \\
\bottomrule
\end{tabular}



  print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())


2) Bootstrapping

In [54]:
def bootstrap_sample(y,x): 
    '''bootstrap_sample: samples a new dataset (with replacement) from the input. 
    Args. 
        y: 1-dimensional N-array
        x: (N,K) matrix 
    Returns
        tuple: y_i, x_i 
            y_i: N-array
            x_i: (N,K) matrix 
    '''
    N,K = y.shape
    
    # FILL IN 
    ii_boot = np.random.choice(N, N, replace=True) # vector of indices for rows 

    y_i = y[ii_boot] # selection of N rows from y 
    x_i = x[ii_boot] # selection of N rows from x 
    
    return y_i, x_i 

In [56]:
nboot = 1000  # Number of bootstraps, should ideally be very large 

# Set seed for random sampling.
seed = 42
rng = default_rng()

# initialize 
E_own = np.empty((nboot,J))
E_own_home = np.empty((nboot,J))
E_own_foreign = np.empty((nboot, J))
E_cross = np.empty((nboot,J))

for i in range(nboot): 
    print(f'Bootstrap iteration {i+1}/{nboot}')
    
    # 1. choose which individuals to draw
    y_i, x_i = bootstrap_sample(y,x) # Fill in: call the bootstrap sample function 
    # 2. estimate and compute 

    res = est.estimate(clogit.q2,theta_start, y_i, x_i, cov_type='Sandwich')

    theta_hat_boot = res['theta']

    ccp_i = clogit.choice_prob(theta_hat_boot, x_i)

    own_p_elas = own_price_elas_num(x_i, theta_hat_boot)
    own_p_elas_home = own_price_elas_num_home(x_i, theta_hat_boot)
    own_p_elas_foreign = own_price_elas_num_foreign(x_i, theta_hat_boot)
    cross_p_elas = cross_price_elas_num(x_i, theta_hat_boot)

    E_own[i,:] = own_p_elas
    E_own_home[i, :] = own_p_elas_home
    E_own_foreign[i, :] = own_p_elas_foreign
    
    E_cross[i,:] = cross_p_elas

Bootstrap iteration 1/1000
Optimization terminated successfully.
         Current function value: 3.459281
         Iterations: 274
         Function evaluations: 11316
         Gradient evaluations: 276
Failed to compute std. errs.: got error "Singular matrix"
Bootstrap iteration 2/1000
Optimization terminated successfully.
         Current function value: 3.463667
         Iterations: 292
         Function evaluations: 12054
         Gradient evaluations: 294
Bootstrap iteration 3/1000
Optimization terminated successfully.
         Current function value: 3.454225
         Iterations: 310
         Function evaluations: 12833
         Gradient evaluations: 313
Bootstrap iteration 4/1000
Optimization terminated successfully.
         Current function value: 3.473357
         Iterations: 288
         Function evaluations: 11931
         Gradient evaluations: 291
Bootstrap iteration 5/1000
Optimization terminated successfully.
         Current function value: 3.475778
         Iterations

In [57]:
res_bootstrap_own_p = {'theta_hat': np.mean(E_own).round(3), 'se': np.std(E_own).round(4), 't-stat': np.mean(E_own).round(3)/np.std(E_own).round(4)}

res_bootstrap_own_p_home = {'theta_hat': np.mean(E_own_home).round(3), 'se': np.std(E_own_home).round(4), 't-stat': np.mean(E_own_home).round(3)/np.std(E_own).round(4)}

res_bootstrap_own_p_foreign = {'theta_hat': np.mean(E_own_foreign).round(3), 'se': np.std(E_own_foreign).round(4), 't-stat': np.mean(E_own_foreign).round(3)/np.std(E_own).round(4)}

res_bootstrap_cross_p = {'theta_hat': np.mean(E_cross).round(3), 'se': np.std(E_cross).round(4), 't-stat': np.mean(E_cross).round(3)/np.std(E_cross).round(4)}

In [60]:
tab1 = clogit.outreg(res_bootstrap_own_p, var_labels=['Own-price elasticity'], name='NUMERICAL Elasticity (bootstrap)')

tab2 = clogit.outreg(res_bootstrap_own_p_home, var_labels=['Own-price elasticity - home produced'], name='NUMERICAL Elasticity (bootstrap)')

tab3 = clogit.outreg(res_bootstrap_own_p_foreign, var_labels=['Own-price elasticity - foreign produced'], name='NUMERICAL Elasticity (bootstrap)')

tab4 = clogit.outreg(res_bootstrap_cross_p, var_labels=['Cross-price elasticity'], name='NUMERICAL Elasticity (bootstrap)')

tabs = [tab1, tab2, tab3, tab4]

print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())

\begin{tabular}{lll}
\toprule
                       & {} & NUMERICAL Elasticity (bootstrap) \\
\midrule
Own-price elasticity & {} &                             -0.0 \\
                       & {} &                          (0.003) \\
Own-price elasticity - home produced & {} &                             -0.0 \\
                       & {} &                          (0.004) \\
Own-price elasticity - foreign produced & {} &                           -0.01* \\
                       & {} &                          (0.003) \\
Cross-price elasticity & {} &                              0.0 \\
                       & {} &                            (0.0) \\
\bottomrule
\end{tabular}



  print(pd.concat(tabs).rename(index={'theta': '', 'se': ''}).to_latex())


---

Old stuff

---

In [None]:
np.linalg.matrix_rank(x) # something is not right when you include the car-brand dummies -> rank-condition is not fulfilled -> matrix becomes singular
# Reference category is 'BMW'
# Could be that none of the top 40 cars sold in a given year and market is home-made and/or that they are BMW's -> some column(s) then become(s) zero.

array([21, 21, 23, 21, 23, 24, 24, 23, 23, 21, 22, 22, 23, 21, 21, 21, 22,
       22, 22, 21, 21, 20, 21, 21, 22, 21, 21, 22, 22, 22, 21, 23, 23, 22,
       24, 24, 23, 20, 20, 20, 21, 21, 21, 20, 21, 22, 21, 21, 22, 22, 21,
       21, 20, 20, 21, 21, 22, 21, 21, 20, 21, 22, 22, 24, 22, 21, 21, 22,
       24, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 23, 23, 23, 24, 25, 25,
       24, 25, 24, 24, 23, 21, 22, 23, 20, 21, 21, 21, 21, 21, 21, 22, 22,
       22, 21, 21, 22, 22, 21, 22, 19, 21, 21, 21, 22, 23, 24, 25, 24, 27,
       25, 23, 23, 25, 27, 25, 25, 24, 21, 22, 21, 22, 24, 22, 22, 21, 22,
       22, 22, 22, 21, 21, 22, 21, 22, 22, 21, 23, 23, 24, 22],
      dtype=int64)

In [None]:
# So in each year, let's define 40 different choices from j=0,...,40 
# NB! Remember that one particular car model may appear multiple times over different years and/or markets.
# This is slightly confusing -> basically, we model the 40 different choices, even though the same choice may appear again later

In [None]:
# Repeating the choice of 40 different cars in each market-year --> 6000 obs
# Making a variable that starts from 0,..., J-1
# Each market-year, agents are subject to 40 different discrete market choices
cars['y'] = np.resize(np.arange(0,J), N*J)
y = cars['y'].values.reshape((N,J))

In [None]:
y[:, 0] #40 different choices vary for each market-year

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
res_alt = est.estimate(clogit.q, theta_start, y[:, 0], x)

Optimization terminated successfully.
         Current function value: 0.961136
         Iterations: 122
         Function evaluations: 5084
         Gradient evaluations: 124
Failed to compute std. errs.: got error "Singular matrix"


In [None]:
theta_res_alt = res_alt['theta']

ccp_res_alt = clogit.choice_prob(theta_res_alt, x)

In [None]:
#cars['y'] = np.resize(np.arange(0, M*T), N*J)
#y = cars['y'].values.reshape((N,J))

#cars['y'] = pd.factorize(cars['co'].values)[0]
#y = pd.factorize(cars['co'].values)[0]
#y = y.reshape((N,J))

#cars['brand']=pd.factorize(cars['brand'])[0]
#y = cars['co'].values.reshape((N,J))
#cars['co']=pd.factorize(cars['co'])[0]