Solution

In [13]:
print(δ.reshape(-1,1))

[[-2.6 ]
 [-5.35]
 [-2.34]
 [-2.58]
 [-3.06]
 [-2.61]
 [-2.27]
 [-2.63]
 [-2.52]
 [-1.85]
 [-4.54]
 [-2.46]
 [-2.39]
 [-2.68]]


# Berry, Levinsohn, Pakes (BLP) Model

- Indices -> j: product, t: market, i:customer

1. Random Coefficients Logit:

* Indirect Utility: 
    - $u_{ijt} = x_{jt}'b_i + a_i*p_{jt} + ξ_{jt} + ϵ_{ijt}$
    - $x_{jt}$: attributes of product j in market t
    - $p_{jt}$: price of product j in market t
    - $ξ_{jt}$: unobserved quality of product j in market t
    - $ϵ_{ijt}$: random Gumbel error (for ith customer, jth product in t market)

* Utility Maximization:
    - Customer i, in market t; chooses one product $j^*$ out of all
    - $j^*_{it} = max_{j} [u_{ijt}]$

* Conditional Choice Probabilities:
    - Market Share: $s_{jt} = P(j^*_{it} = j) = \frac{\exp(x_{jt}'b_i + a_i*p_{jt} + ξ_{jt})}{1+\sum_{j} (x_{jt}'b_i + a_i*p_{jt} + ξ_{jt})}$
    - Demand Derivatives: 
        * $\frac{\partial s_j}{\partial p_k}|_{j \not=k} = a_i s_j s_k$
        * $\frac{\partial s_j}{\partial p_j} = a_i s_j (1-s_j)$


2. Parameters to Shares

* Structural parameters
    - $[b_i;a_i] = [β;α] + A*D_i + B*v_i$
    - $D_i$: demographics
    - $θ_1 = [β;α]$: "common preferences" for all customers
    - $θ_2 = [A, B]$: "group-specific preferences"
    

* Constant-utility and random-utility: 
    - $u_{ijt} = δ_{jt} + μ_{ijt}$
    - $δ_{jt}(x_{jt}, p_{jt}, ξ_{jt};θ_1)=x_{jt}'β + α*p_{jt} + ξ_{jt}$: Constant-utility (fixed for all customers, depends on product and market only)
    - $μ_{ijt}(x_{jt}, p_{jt}, D_i, v_i;θ_2)= [p_{jt},x_{jt}]'(A*D_i+B*v_i)$: random-utility (varies for each customer)


* Market shares: 
    - $s_{jt} = σ_{jt}(δ_{t}, x_{t}, p_{t};θ_2) = \int_{v} \int_{D} \frac{\exp(δ_{jt} + μ_{ijt})}{1+\sum_{j} (δ_{jt} + μ_{ijt})} dF(v) dF(D)$
    -$\frac{\partial s_j}{\partial p_k} = \int \int \frac{P(j^*=i|j,t}{3} $


3. Inverting Demand

* Logic
    - We want to find the mean-utility $δ_{jt}$ implied for any $θ_2$. We first find $s_{jt}$ and $\mu_{it}$ implied by $θ_2$ and then find $δ_{jt}$.
    - $s_{jt} = σ_{jt}(δ_{t}, x_{t}, p_{t};θ_2) = \int_{v} \int_{D} \frac{\exp(δ_{jt} + μ_{ijt})}{1+\sum_{j} (δ_{jt} + μ_{ijt})} dF(v) dF(D)$
    - $δ_{jt} = σ_{jt}^{-1}(s_{t}, x_{t}, p_{t};θ_2)= x_{jt}'β + α*p_{jt} + ξ_{jt}$ (once we have this we can estimate $β$ and $α$ by 2SLS.
* Algorithm: 
    - 1) Guess $θ_2 = [A, B]$ and set $k = 0$
    - 2) set $k = 0$ and use $δ^{k}_{jt} = \log(s_{jt})-\log(s_{0t})$
    - 3) Compute for each customer i, the probability to choose product j in t: $\frac{\exp(δ^k_{jt} + μ_{ijt})}{1+\sum_{j} (δ^k_{jt} + μ_{ijt})}$
    - 4) Avg over all customers to get the market share for product j in t: $σ_{jt}(δ_{t}, x_{t}, p_{t};θ_2)=(1/ns)\sum \frac{\exp(δ^k_{jt} + μ_{ijt})}{1+\sum_{j} (δ^k_{jt} + μ_{ijt})}$
    - 5) Apply contraction mapping: $exp(δ^{k+1}_{jt}) = exp(δ^{k}_{jt}) \frac{s_{jt}}{σ_{jt}(δ^{k}_{t}, x_{t}, p_{t};θ_2)}$
    - 6) set $k = k+1$ and go back to 3 until $δ^{k+1}-δ^{k}$ below tolerance.
    
    

In [4]:
import pandas as pd 
import numpy as np
np.set_printoptions(precision=2)
df = pd.read_csv('/Users/pranjal/Desktop/Structural-Economics/io/random-coefficients-logit/mumat.csv')
df

Unnamed: 0,cdid,prodid,s_jt,cons1,cons2,cons3,cons4,cons5,cons6,cons7,...,cons41,cons42,cons43,cons44,cons45,cons46,cons47,cons48,cons49,cons50
0,1,1,0.046474,0.045385,-0.140034,0.079154,-0.161694,0.247079,-0.08265,-0.481462,...,-0.359403,-0.359428,0.168978,-0.284589,0.33735,0.357205,-0.070507,-0.355039,0.319773,-0.217316
1,1,2,0.00279,0.449763,0.442506,-0.421718,0.435539,0.437777,-0.12965,-0.304529,...,-0.348173,-0.10244,-0.000247,-0.221337,-0.247281,-0.208628,-0.225236,-0.299602,0.204076,-0.325135
2,1,3,0.062422,0.462798,-0.38264,0.488984,0.235094,-0.223185,0.240559,-0.32514,...,0.20198,0.014754,-0.404794,0.450001,0.137239,0.312372,-0.435459,0.001062,-0.235879,0.04791
3,1,4,0.049676,0.26914,0.274197,-0.43493,0.286413,-0.26134,0.417779,0.498312,...,0.399959,-0.153296,-0.432854,0.257662,0.071769,0.010158,-0.236577,0.153458,-0.116707,0.289851
4,1,5,0.029658,0.441156,0.39238,0.080439,0.26695,0.200849,0.43351,-0.300214,...,-0.450249,0.108343,0.000788,0.485259,-0.020178,0.234042,0.463222,-0.389709,0.421268,-0.241839
5,1,6,0.047682,0.211303,0.426995,-0.242505,0.393396,-0.424232,-0.102105,0.244284,...,0.31001,0.353601,0.110555,0.338142,-0.080329,-0.204023,-0.269783,-0.464262,-0.34673,0.249905
6,1,7,0.066592,-0.31585,0.092849,0.02473,0.115274,0.22122,-0.328235,0.32676,...,0.491916,0.110779,0.372904,0.439816,-0.492881,-0.158881,0.499825,-0.470704,-0.015569,0.299903
7,1,8,0.04674,0.271659,0.033336,-0.186894,0.254425,-0.2973,0.388584,0.276544,...,0.33508,0.047714,-0.16677,0.461045,0.139331,0.366839,0.048791,-0.337562,-0.110422,0.274701
8,1,9,0.047965,-0.100122,-0.066365,-0.365855,-0.089553,-0.159405,0.033011,0.345223,...,0.29316,-0.467509,-0.278754,-0.439193,-0.217016,-0.296868,-0.106273,0.129209,0.427242,-0.290517
9,2,1,0.113105,0.323404,0.20323,0.297017,0.222407,0.463325,0.182649,-0.141104,...,0.173135,-0.49292,-0.281905,0.188003,-0.159972,-0.361882,0.399219,-0.029544,-0.025181,-0.041182


In [5]:
# index of records
ID = np.array(df.index)
ID_idx = ID.shape[0]

# cdid: market id (total 2)
cdid = np.array(df['cdid'])

# prodid: product id (total 9)
prodid = np.array(df['prodid'])

# cdindex: index of last element in market
cdindex = np.searchsorted(cdid, np.unique(cdid))

# market shares for each product j and market t
s = np.array(df['s_jt'])

# Mean-Deviations: μ_ijt for ith customer, for JxT product/markets.
μ = np.array(df.drop(['cdid', 'prodid', 's_jt'], axis = 1))

# Share of the outside good in each market t
s_sum = np.array(df[['s_jt','cdid']].groupby('cdid').sum())
s0 = 1 - s_sum
s0 = np.where(cdid==1, s0[0], s0[1])

# Initial guess for Mean-utilities
δ = np.log(s) - np.log(s0)
print(δ)

# Number of customers, products and markets
N = μ.shape[1]
J = np.unique(prodid).shape[0]
T = np.unique(cdid).shape[0]

[-2.56 -5.37 -2.26 -2.49 -3.01 -2.53 -2.2  -2.55 -2.53 -1.82 -4.52 -2.46
 -2.32 -2.63]


In [6]:
def CCP(μ_ijt, μ_irt, δ_jt, δ_rt):
    '''For a market t, given mean valuations and mean deviations of products '''
    try: 
        return (np.exp(δ_jt + μ_ijt)/(1 + np.sum(np.exp(δ_rt + μ_irt))))[0]
    except: 
        return 0

# Example
i = 1
j = 1
t = 2
idx = np.multiply(cdid==t, prodid==j)
print(idx)
μ_ijt = μ[idx, i-1] # scalar: mean deviations for i,j,t
μ_irt = μ[cdid==t, i-1] # vector: mean deviations for i, t for all products
δ_jt = δ[idx] # scalar: mean-valuation for j,t (fixed for all customers)
δ_rt = δ[cdid==t] # vector: mean-valuation for all products in market t
print(μ_ijt.shape, μ_irt.shape, δ_jt.shape, δ_rt.shape)
print(CCP(μ_ijt, μ_irt, δ_jt, δ_rt))

[False False False False False False False False False  True False False
 False False]
(1,) (5,) (1,) (5,)
0.14678166923652114


In [7]:
def CCPMatrix(μ, δ):
    '''Return Consumer Choice Probability for each i and product/market'''
    P = np.zeros((ID_idx, N))
    for t in range(1,T+1):
        for j in range(1,J+1):
            idx = np.multiply(cdid==t, prodid==j)
            δ_jt = δ[idx]
            δ_rt = δ[cdid==t]
            for i in range(1,N+1):
                μ_ijt = μ[idx, i-1]
                μ_irt = μ[cdid==t, i-1]
                P[idx, i-1] = CCP(μ_ijt, μ_irt, δ_jt, δ_rt) 
    return P

P = CCPMatrix(μ, δ)
print(P.shape)

# Checks
idx = np.multiply(cdid==2, prodid==3)
print(idx)
print(np.sum(P[idx, :]/50), np.sum(s[ID[idx]]))

idx = np.multiply(cdid==2, prodid==4)
print(idx)
print(np.sum(P[idx, :]/50), np.sum(s[ID[idx]]))

(14, 50)
[False False False False False False False False False False  True False
 False False]
0.007735102054792866 0.007645334
[False False False False False False False False False False False False
 False False]
0.0 0.0


In [8]:
def σ_jt(P, j, t):
    '''Using CCP return Market share for product j and t'''
    idx = np.multiply(cdid==t, prodid==j)
    if np.mean(P[idx, :])>0:
        return np.mean(P[idx, :])
    else: 
        return 0

# Checks
print(σ_jt(P, 1, 1))
print(σ_jt(P, 4, 2))

0.047173663262102906
0


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [9]:
def contractionMap(δ, μ, tol=0.000001):
    '''Input: Guess for mean-valuations and mean-deviations for all products and all markets
    Output: Optimal mean-valuations
    '''
    expδ = np.exp(δ)
    error = 1
    cnt = 1
    while error > tol:
        print(cnt)
        P = CCPMatrix(μ, np.log(expδ))
        for t in range(1,T+1):
            for j in range(1,J+1):
                idx = np.multiply(cdid==t, prodid==j)
                expδ[idx] = expδ[idx]*s[idx]/σ_jt(P, j, t)
                error = np.linalg.norm(expδ[idx]*s[idx]/σ_jt(P, j, t) - expδ[idx])
        cnt = cnt + 1
    return np.log(expδ) # return δ

δ_0 = np.log(s) - np.log(s0) # initial guess
δ = contractionMap(δ_0, μ)

print(δ)

1
2
3
4
5
6
7
8
[-2.6  -5.35 -2.34 -2.58 -3.06 -2.61 -2.27 -2.63 -2.52 -1.85 -4.54 -2.46
 -2.39 -2.68]
