In [1]:
import numpy as np
from scipy.optimize import fsolve, fixed_point
from matplotlib import pyplot as plt
import pyblp
from tqdm.notebook import trange
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
import pandas as pd

In [2]:
RNG_SEED = 8476263

rng = np.random.default_rng(RNG_SEED) # this random seeding is for reproducibility

In [3]:
# I am horrified that we have to overrun the default collinearity checks
# however, wired and satellite dummy variables are collinear
# so to prevent PyBLP from throwing a fit, we must do this.
pyblp.options.collinear_rtol = 0
pyblp.options.collinear_atol = 0

In [4]:
# fixed parameter definitions

beta1 = 1
alpha = -2
gamma0 = 1/2
gamma1 = 1/4
beta2_bar = 4
beta3_bar = 4
sigma2 = 1
sigma3 = 1

# markets and goods
T = 600
J = 4

In [5]:
# 3.1

# x_jt, w_jt are absolute value of iid standard normal variables
x = np.absolute(rng.standard_normal(size=(J,T)))
w = np.absolute(rng.standard_normal(size=(J,T)))

unobservable_mean = [0,0]
unobservable_cov = [[1,0.25],[0.25,1]]
unobservables = rng.multivariate_normal(unobservable_mean, unobservable_cov, size=(J,T))
xi = unobservables[:,:,0]
omega = unobservables[:,:,1]

In [6]:
# 3.2a
# defining the market share

def own_mkt_share_derivative(t, p, beta2, beta3): 
    # p should be a length J vector
    # betas should be num_sims
    
    u_t = np.tile(x[:,t] + xi[:,t] + alpha*p, (len(beta2), 1)) # num_sims x J 
    for j in range(J):
        if j < 2:
            u_t[:,j] = u_t[:,j] + beta2
        else:
            u_t[:,j] = u_t[:,j] + beta3
            
    Z = np.tile( 1 + np.sum(np.exp(u_t),axis=-1), (J,1)).T
    numerator = alpha*np.exp(u_t)*Z - alpha*np.square(np.exp(u_t)) # num_sims x J
    denominator = np.square(Z)
    
    return np.mean(numerator / denominator, axis=0)

def outside_mkt_share_derivative(t, p, beta2, beta3): 
    # p should be a length J vector
    # betas should be num_sims
    
    u_t = np.tile(x[:,t] + xi[:,t] + alpha*p, (len(beta2), 1)) # num_sims x J 
    for j in range(J):
        if j < 2:
            u_t[:,j] = u_t[:,j] + beta2
        else:
            u_t[:,j] = u_t[:,j] + beta3
            
    Z = np.tile( 1 + np.sum(np.exp(u_t),axis=-1), (J,1)).T
    numerator = -1*alpha*np.exp(u_t) # num_sims x J
    denominator = np.square(Z)
    
    return np.mean(numerator / denominator, axis=0)  
    
def full_mkt_share_derivative(t, p, beta2, beta3):
    # p should be a length J vector
    # betas should be num_sims
    
    u_t = np.tile(x[:,t] + xi[:,t] + alpha*p, (len(beta2), 1)) # num_sims x J 
    for j in range(J):
        if j < 2:
            u_t[:,j] = u_t[:,j] + beta2
        else:
            u_t[:,j] = u_t[:,j] + beta3
            
    Z = np.tile( 1 + np.sum(np.exp(u_t),axis=-1), (J,1)).T # num_sims x J
    
    derivatives = np.zeros((J,J))
    
    own_numerator = alpha*np.exp(u_t)*Z - alpha*np.square(np.exp(u_t)) # num_sims x J
    denominator = np.square(Z)
    
    for j in range(J):
        derivatives[j,j] = np.mean(own_numerator / denominator, axis=0)[j]
        
    for j in range(J):
        for k in range(J):
            if not (j == k):
                derivatives[j,k] = np.mean(-1*alpha*np.exp(u_t)[:,k]*np.exp(u_t)[:,j] / np.square(1 + np.sum(np.exp(u_t),axis=-1)))
    
    return derivatives

In [7]:
# s_jt(p) 
def mkt_share(t, p, beta2, beta3):
    # p should be a length J vector
    # betas should be num_sims
    
    u_t = np.tile(x[:,t] + xi[:,t] + alpha*p, (len(beta2), 1)) # num_sims x J 
    for j in range(J):
        if j < 2:
            u_t[:,j] = u_t[:,j] + beta2
        else:
            u_t[:,j] = u_t[:,j] + beta3
            
    numerator = np.exp(u_t) 
    denominator = 1 + np.sum(np.exp(u_t),axis=-1) # num_sims
    
    return np.mean(numerator / (np.tile(denominator, (J, 1)).T), axis=0) 


In [8]:
# 3.2a(iv)
# draw beta coefficients for N individuals S times, observe variation in market share derivatives

S = 100

all_derivatives = np.zeros((J,J,S))
all_shares = np.zeros((J,S))

N = 3000

price = np.array([1,1,1,1])

for s in trange(S):
    beta2 = rng.normal(beta2_bar, sigma2, N)
    beta3 = rng.normal(beta3_bar, sigma3, N)
    all_derivatives[:,:,s] = full_mkt_share_derivative(0, price, beta2, beta3)
    all_shares[:,s] = mkt_share(1, price, beta2, beta3)
(np.mean(all_shares,axis=1), np.std(all_shares,axis=1), np.mean(all_derivatives, axis=2), np.std(all_derivatives, axis=2))

HBox(children=(IntProgress(value=0), HTML(value='')))




(array([0.04784253, 0.15288083, 0.44046486, 0.35277411]),
 array([0.0008991 , 0.00287306, 0.00211771, 0.0016961 ]),
 array([[-0.29478105,  0.06650959,  0.2168944 ,  0.00709914],
        [ 0.06650959, -0.16315672,  0.09183023,  0.00300568],
        [ 0.2168944 ,  0.09183023, -0.35012373,  0.03100105],
        [ 0.00709914,  0.00300568,  0.03100105, -0.04144621]]),
 array([[3.08401066e-03, 1.64034900e-03, 1.89106999e-03, 6.18963701e-05],
        [1.64034900e-03, 2.14278030e-03, 8.00654110e-04, 2.62061074e-05],
        [1.89106999e-03, 8.00654110e-04, 2.45783477e-03, 3.51894072e-04],
        [6.18963701e-05, 2.62061074e-05, 3.51894072e-04, 2.89493485e-04]]))

In [9]:
mc = np.exp( gamma0 + gamma1*w + omega/8)

In [10]:
# define function to solve

def get_function_to_solve(t, beta2, beta3):
    def F(p):
        # p is a 
        ds_dp = own_mkt_share_derivative(t, p, beta2, beta3)
        shares = mkt_share(t, p, beta2, beta3)
        return p - mc[:,t] + np.reciprocal(ds_dp)*shares
        
    return F


In [11]:
# draw betas, now compute equilibrium prices and shares

beta2 = rng.normal(beta2_bar, sigma2, (N,T))
beta3 = rng.normal(beta3_bar, sigma3, (N,T))

In [12]:


# 3.2 and 3.3: compute equilibrium shares, prices

# these two variables are the prices and shares
eq_prices = np.zeros((J, T))
eq_shares = np.zeros((J, T))

flag_total = 0

for t in trange(T):
    fn = get_function_to_solve(t, beta2[:,t], beta3[:,t])
    mkt_eq_prices, _ , flag, _ = fsolve(fn, np.array([1,1,1,1]), full_output=True)
    flag_total += flag
    eq_prices[:,t] = mkt_eq_prices
    eq_shares[:, t] = mkt_share(t, mkt_eq_prices, beta2[:,t], beta3[:,t])
    
# this should be True iff all of the fsolves converge
flag_total == T

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))




True

In [13]:
# check that at the equilibrium prices, the estimates for market shares and market share derivatives are precise
# repeating the exercise of simulation with equilibrium prices, trying to get equilibrium shares

S = 100

all_derivatives = np.zeros((J,J,S))
all_shares = np.zeros((J,S))

N = 100

for t in trange(T):
    price = np.array(eq_prices[:,t])
    for s in range(S):
        beta2_s = np.random.normal(beta2_bar, sigma2, N)
        beta3_s = np.random.normal(beta3_bar, sigma3, N)
        all_derivatives[:,:,s] = full_mkt_share_derivative(0, price, beta2_s, beta3_s)
        all_shares[:,s] = mkt_share(t, price, beta2_s, beta3_s)
    
(np.mean(all_shares,axis=1), np.std(all_shares,axis=1), np.mean(all_derivatives, axis=2), np.std(all_derivatives, axis=2))

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))




(array([0.25507194, 0.11278189, 0.40064946, 0.08424679]),
 array([0.01488757, 0.00658265, 0.0197216 , 0.00414697]),
 array([[-0.32679671,  0.04788997,  0.18986696,  0.00799054],
        [ 0.04788997, -0.11260564,  0.04405547,  0.00185407],
        [ 0.18986696,  0.04405547, -0.39812249,  0.02482909],
        [ 0.00799054,  0.00185407,  0.02482909, -0.04053913]]),
 array([[1.43467614e-02, 5.21389120e-03, 8.04643656e-03, 3.38633904e-04],
        [5.21389120e-03, 6.62030404e-03, 1.86704177e-03, 7.85743652e-05],
        [8.04643656e-03, 1.86704177e-03, 1.17833364e-02, 1.96219771e-03],
        [3.38633904e-04, 7.85743652e-05, 1.96219771e-03, 1.82593864e-03]]))

In [14]:
# Morrow and Skerlos (2011) Method: (see equation 27 in Conlon + Gortmaker)

def get_matrices(t, p, beta2, beta3):
    # p should be a length J vector
    # betas should be num_sims
    
    u_t = np.tile(x[:,t] + xi[:,t] + alpha*p, (len(beta2), 1)) # num_sims x J 
    for j in range(J):
        if j < 2:
            u_t[:,j] = u_t[:,j] + beta2
        else:
            u_t[:,j] = u_t[:,j] + beta3
            
    Z = np.tile( 1 + np.sum(np.exp(u_t),axis=-1), (J,1)).T # num_sims x J
    
    Lambda_inv = np.zeros((J,J))
    Gamma = np.zeros((J,J))
    
    own_numerator = alpha*np.exp(u_t)  # num_sims x J
    denominator = Z
    
    for j in range(J):
        Lambda_inv[j,j] = 1 / (np.mean(own_numerator / denominator, axis=0)[j])
        
    for j in range(J):
        for k in range(J):
            Gamma[j,k] = np.mean(alpha*np.exp(u_t)[:,k]*np.exp(u_t)[:,j] / np.square(1 + np.sum(np.exp(u_t),axis=-1)))
    
    return Lambda_inv, Gamma

def get_fixed_point_function(t, beta2, beta3):
    ownership_matrix = np.identity(J)
    def F(p):
        Lambda_inv, Gamma = get_matrices(t, p, beta2, beta3)
        shares = mkt_share(t, p, beta2, beta3)
        zeta = np.matmul(np.matmul(Lambda_inv, ownership_matrix*Gamma), (p - mc[:,t])) - np.matmul(Lambda_inv, shares)
        return mc[:,t] + zeta
        
    return F



In [15]:
# Simulate equilibrium using the Morrow and Skerlos (2011) method
eq_prices_2 = np.zeros((J, T))
eq_shares_2 = np.zeros((J, T))

for t in trange(T):
    fn = get_fixed_point_function(t, beta2[:,t], beta3[:,t])
    mkt_eq_prices = fixed_point(fn, np.array([1,1,1,1]), method="iteration")
    eq_prices_2[:,t] = mkt_eq_prices
    eq_shares_2[:, t] = mkt_share(t, mkt_eq_prices, beta2[:,t], beta3[:,t])

# the difference between the two methods, check that this is small
np.max(eq_prices_2 - eq_prices), np.max(eq_shares_2 - eq_shares)

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))




(1.373072322508051e-09, 4.207107107134789e-09)

In [16]:
# Precompute the price elasticities and diversion 
# What PyBLP does, and what we will do, is replace the diagonal of the diversion ratio matrix with the outside option diversion ratio (instead of -1)
true_price_elasticities = np.zeros((J,J,T))
true_diversion_ratios = np.zeros((J,J,T))

N = 100

for t in trange(T):
    own_price_derivative = own_mkt_share_derivative(t, eq_prices[:,t], beta2[:,t], beta3[:,t])
    derivative_matrix = full_mkt_share_derivative(t, eq_prices[:,t], beta2[:,t], beta3[:,t])
    true_price_elasticities[:,:,t] = eq_prices[:,t]*derivative_matrix / eq_shares[:,t].T
    derivative_matrix = full_mkt_share_derivative(t, eq_prices[:,t], beta2[:,t], beta3[:,t])
    outside_derivatives = outside_mkt_share_derivative(t, eq_prices[:,t], beta2[:,t], beta3[:,t])
    for j in range(J):
        for k in range(J):
            true_diversion_ratios[j,k,t] = -1*derivative_matrix[k,j]/derivative_matrix[j,j]
    for j in range(J):
        true_diversion_ratios[j,j,t] = -1*outside_derivatives[j]/derivative_matrix[j,j]

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))




In [17]:

market_ids = np.tile(np.arange(T) + 1,(J,1)).T.flatten()
firm_ids = np.tile(np.arange(J) + 1,(T,1)).flatten()
satellite = np.concatenate((np.ones((2,T)), np.zeros((2,T)))).T.flatten()
wired = np.concatenate((np.zeros((2,T)), np.ones((2,T)))).T.flatten()
observed_data = pd.DataFrame(data={
    "market_ids": market_ids, 
    "firm_ids": firm_ids,
    "shares": eq_shares.T.flatten(), 
    "prices": eq_prices.T.flatten(),
    "x": x.T.flatten(),
    "satellite": satellite,
    "wired": wired,
    "w": w.T.flatten()
})
unobserved_data = pd.DataFrame(data={
    "market_ids": market_ids, 
    "firm_ids": firm_ids,
    "xi": xi.T.flatten(),
    "omega": omega.T.flatten()
})

In [18]:
# Instrument Analysis

df1 = pd.DataFrame({
    'p1':eq_prices[0,:],
    "s1":eq_shares[0,:],
    'p2':eq_prices[1,:],
    "s2":eq_shares[1,:],
    'p3':eq_prices[2,:],
    "s3":eq_shares[2,:],
    'p4':eq_prices[3,:],
    "s4":eq_shares[3,:],
    'x1':pd.Series(x[0,:]),
    'w1':pd.Series(w[0,:]),
    'x2':pd.Series(x[1,:]),
    'w2':pd.Series(w[1,:]),
    'x3':pd.Series(x[2,:]),
    'w3':pd.Series(w[2,:]),
    'x4':pd.Series(x[3,:]),
    'w4':pd.Series(w[3,:]),
    
})

X = df1[["x1","x2","x3","x4","w1","w2","w3","w4"]]
# regress prices on observables 

modelp1 = sm.OLS(df1["p1"],X).fit()
modelp2 = sm.OLS(df1["p2"],X).fit()
modelp3 = sm.OLS(df1["p3"],X).fit()
modelp4 = sm.OLS(df1["p4"],X).fit()
models1 = sm.OLS(df1["s1"],X).fit()
models2 = sm.OLS(df1["s2"],X).fit()
models3 = sm.OLS(df1["s3"],X).fit()
models4 = sm.OLS(df1["s4"],X).fit()

In [19]:
modelp1.rsquared_adj, modelp2.rsquared_adj, modelp3.rsquared_adj, modelp4.rsquared_adj

(0.9435061522877474,
 0.9480775331245905,
 0.9468682020346536,
 0.9477482549660268)

In [20]:
models1.rsquared_adj, models2.rsquared_adj, models3.rsquared_adj, models4.rsquared_adj

(0.7644818350345643,
 0.7983742915490801,
 0.7648735858119549,
 0.7737755052449837)

## Part 4

In [21]:
model_data = observed_data.copy()
model_data["x_other"] = np.stack([
    x[1,:]+x[2,:]+x[3,:], 
    x[0,:]+x[2,:]+x[3,:], 
    x[0,:]+x[1,:]+x[3,:], 
    x[0,:]+x[1,:]+x[2,:]]).T.flatten()
model_data["w_other"] = np.stack(
    [w[1,:]+w[2,:]+w[3,:],
     w[0,:]+w[2,:]+w[3,:], 
     w[0,:]+w[1,:]+w[3,:], 
     w[0,:]+w[1,:]+w[2,:]]).T.flatten()


In [22]:
# 4A: Logit
outside_shares = 1 - np.sum(eq_shares, axis=0, keepdims=True)
y = np.log(eq_shares/outside_shares).T.flatten()
X = model_data[["x","satellite","wired","prices"]]
results = sm.OLS(y,X).fit()
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.314
Model:,OLS,Adj. R-squared:,0.313
Method:,Least Squares,F-statistic:,365.4
Date:,"Wed, 13 Oct 2021",Prob (F-statistic):,2.0900000000000003e-195
Time:,21:46:41,Log-Likelihood:,-3033.1
No. Observations:,2400,AIC:,6074.0
Df Residuals:,2396,BIC:,6097.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x,0.8375,0.029,28.572,0.000,0.780,0.895
satellite,1.3705,0.122,11.239,0.000,1.131,1.610
wired,1.3589,0.123,11.046,0.000,1.118,1.600
prices,-0.9518,0.044,-21.393,0.000,-1.039,-0.865

0,1,2,3
Omnibus:,41.828,Durbin-Watson:,2.047
Prob(Omnibus):,0.0,Jarque-Bera (JB):,48.815
Skew:,-0.263,Prob(JB):,2.51e-11
Kurtosis:,3.46,Cond. No.,30.0


Note that ignoring the endogeneity of prices results in underestimating the magnitudes of all the relevant parameters.

In [23]:
#6: IV-2SLS
X_exog = model_data[["x", "satellite", "wired"]].astype(float)
X_endog = model_data[["prices"]].astype(float)
Z = model_data[["w", "x_other", "w_other"]].astype(float)
# we'll just instrument for prices with w and x of the own-product; it seems good enough here
iv_model = IV2SLS(y, X_exog, X_endog, Z).fit()
iv_model.summary

0,1,2,3
Dep. Variable:,dependent,R-squared:,0.1841
Estimator:,IV-2SLS,Adj. R-squared:,0.1831
No. Observations:,2400,F-statistic:,2007.7
Date:,"Wed, Oct 13 2021",P-value (F-stat),0.0000
Time:,21:46:41,Distribution:,chi2(4)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
x,0.9461,0.0331,28.609,0.0000,0.8813,1.0109
satellite,3.8635,0.1878,20.575,0.0000,3.4955,4.2315
wired,3.8774,0.1880,20.628,0.0000,3.5090,4.2458
prices,-1.8992,0.0700,-27.132,0.0000,-2.0364,-1.7620


In [24]:
#4.7 nested logit

# construct log of within group share
satellite_share= eq_shares[0,:] + eq_shares[1,:]
wired_share= eq_shares[2,:] + eq_shares[3,:]
model_data["within_satellite_shares"] = model_data["satellite"]*np.log(eq_shares / satellite_share).T.flatten()
model_data["within_wired_shares"] = model_data["wired"]*np.log(eq_shares / wired_share).T.flatten()
model_data["within_group_shares"] = model_data["within_wired_shares"] + model_data["within_satellite_shares"]

# now use the other in-group firm's characteristics as instruments
model_data["x_other_satellite"] = np.stack([x[1,:], x[0,:], x[3,:], x[2,:]]).T.flatten()*model_data["satellite"]
model_data["w_other_satellite"] = np.stack([w[1,:], w[0,:], w[3,:], w[2,:]]).T.flatten()*model_data["satellite"]
model_data["x_other_wired"] = np.stack([x[1,:], x[0,:], x[3,:], x[2,:]]).T.flatten()*model_data["wired"]
model_data["w_other_wired"] = np.stack([w[1,:], w[0,:], w[3,:], w[2,:]]).T.flatten()*model_data["wired"]

X_exog = model_data[["x","satellite","wired"]]
X_endog = model_data[["prices", "within_satellite_shares", "within_wired_shares" ]]
Z = model_data[["w", "x_other", "w_other", "x_other_satellite", "w_other_satellite", "x_other_wired", "w_other_wired"]]
iv_model = IV2SLS(y, X_exog, X_endog, Z).fit()
iv_model.summary

0,1,2,3
Dep. Variable:,dependent,R-squared:,0.3589
Estimator:,IV-2SLS,Adj. R-squared:,0.3576
No. Observations:,2400,F-statistic:,2635.4
Date:,"Wed, Oct 13 2021",P-value (F-stat),0.0000
Time:,21:46:41,Distribution:,chi2(6)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
x,0.8483,0.0377,22.479,0.0000,0.7743,0.9223
satellite,3.4995,0.1923,18.200,0.0000,3.1226,3.8763
wired,3.4881,0.1825,19.111,0.0000,3.1303,3.8458
prices,-1.6649,0.0784,-21.241,0.0000,-1.8185,-1.5112
within_satellite_shares,0.2173,0.0770,2.8237,0.0047,0.0665,0.3681
within_wired_shares,0.1944,0.0714,2.7237,0.0065,0.0545,0.3342


In [25]:
# define functions for derivatives and shares in the nested logit

def full_mkt_share_derivative_nested(t, p, pars):
    
    XX =  [x[:,t], [1,1,0,0], [0,0,1,1], p  ]
        
    v_t = pars[0:4]  @ XX
    
    sigma_1 = pars[4]
    
    sigma_2 = pars[5]
    
    theta_2 = pars[3]
   
            
    D1 = np.exp(v_t[0]/(1- sigma_1)) + np.exp(v_t[1]/(1- sigma_1)) 
    
    D2 = np.exp(v_t[2]/(1- sigma_2)) + np.exp(v_t[3]/(1- sigma_2)) 
            
    Z = 1 + np.power(D1, (1- sigma_1)) + np.power(D2, (1- sigma_2))
    
    
    derivatives = np.zeros((J,J))
    
    for j in range(J):
        if j < 2:
            derivatives[j,j] = (theta_2/(1 - sigma_1))*( np.exp(v_t[j]/(1- sigma_1))*np.power(D1, sigma_1)*Z -  np.exp((2*v_t[j])/(1- sigma_1))*(   sigma_1*np.power(D1, sigma_1 -1)*Z + (1- sigma_1)) )  / np.square((np.power(D1, sigma_1)*Z))
        else:
            derivatives[j,j] = (theta_2/(1 - sigma_2))*( np.exp(v_t[j]/(1- sigma_2))*np.power(D2, sigma_2)*Z -  np.exp((2*v_t[j])/(1- sigma_2))*(   sigma_2*np.power(D2, sigma_2 -1)*Z + (1- sigma_2)) )  / np.square((np.power(D2, sigma_2)*Z))
            
    
    for j in range(J):
        for k in range(J):
            if not (j == k):
                if j < 2 and k < 2:
                    derivatives[j,k] = (-theta_2/(1 - sigma_1))*np.exp(v_t[j]/(1- sigma_1))*np.exp(v_t[k]/(1- sigma_1))*( sigma_1*np.power(D1,sigma_1-1)*Z + (1- sigma_1)   )  / np.square((np.power(D1, sigma_1)*Z))
                if j >= 2 and k >= 2:
                    derivatives[j,k] = (-theta_2/(1 - sigma_2))*np.exp(v_t[j]/(1- sigma_2))*np.exp(v_t[k]/(1- sigma_2))*( sigma_1*np.power(D2,sigma_2-1)*Z + (1- sigma_2)   )  / np.square((np.power(D2, sigma_2)*Z))
                if j < 2 and k >= 2:    
                    derivatives[j,k] = (-theta_2/(1 - sigma_2))*np.exp(v_t[j]/(1- sigma_1))*np.exp(v_t[k]/(1- sigma_2))*(1-sigma_2)*np.power(D2, - sigma_2)*np.power(D1, sigma_1) / np.square((np.power(D1, sigma_1)*Z))
                if j >= 2 and k < 2:    
                    derivatives[j,k] = (-theta_2/(1 - sigma_1))*np.exp(v_t[j]/(1- sigma_2))*np.exp(v_t[k]/(1- sigma_1))*(1-sigma_1)*np.power(D1, - sigma_1)*np.power(D2, sigma_2) / np.square((np.power(D2, sigma_2)*Z))
                    
                    
    estimated_shares = mkt_share_nested(t, p, pars)
    return derivatives, -1*theta_2*estimated_shares*(1 - estimated_shares.sum())
            
def mkt_share_nested(t, p, pars):
    
    XX =  [x[:,t], [1,1,0,0], [0,0,1,1], p  ]
        
    v_t = pars[0:4] @XX
    
    sigma_1 = pars[4]
    
    sigma_2 = pars[5]
    
    theta_2 = pars[3]
   

    D1 = np.exp(v_t[0]/(1- sigma_1)) + np.exp(v_t[1]/(1- sigma_1)) 
    
    D2 = np.exp(v_t[2]/(1- sigma_2)) + np.exp(v_t[3]/(1- sigma_2)) 
            
    Z = 1 + np.power(D1, (1- sigma_1)) + np.power(D2, (1- sigma_2))
    
    
    shares = np.zeros((J,1))
    
    for j in range(J):
        if j < 2:
            shares[j] = (np.exp(v_t[j]/(1-sigma_1))) /  (np.power(D1, sigma_1)*Z)
        else:
            shares[j] = (np.exp(v_t[j]/(1-sigma_2))) /  (np.power(D2, sigma_2)*Z)

       
    return shares
    


In [26]:
# Precompute the price elasticities and diversion 
nested_logit_price_elasticities = np.zeros((J,J,T))
nested_logit_diversion_ratios = np.zeros((J,J,T))

N = 100

for t in trange(T):
    derivative_matrix, outside_derivative = full_mkt_share_derivative_nested(t, eq_prices[:,t], iv_model.params)
    nested_logit_price_elasticities[:,:,t] = eq_prices[:,t]*derivative_matrix / eq_shares[:,t].T
    estimated_shares = mkt_share_nested(t, eq_prices[:,t], iv_model.params)
    for j in range(J):
        for k in range(J):
            nested_logit_diversion_ratios[j,k,t] = -1*derivative_matrix[k,j]/derivative_matrix[j,j]
        nested_logit_diversion_ratios[j,j,t] = -1*outside_derivative[j]/derivative_matrix[j,j]

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))




In [27]:
nested_logit_price_elasticities.mean(axis=2), true_price_elasticities.mean(axis=2)

(array([[-6.12190672,  1.99361387,  1.14721285,  1.16871821],
        [ 1.94980523, -6.22209392,  1.23343822,  1.28434919],
        [ 1.18472154,  1.16184628, -6.06022182,  2.04574024],
        [ 1.15410091,  1.21673082,  1.99322672, -6.28818366]]),
 array([[-4.06535006,  1.38543391,  0.80172334,  0.7895892 ],
        [ 1.27934133, -4.16553436,  0.71112989,  0.71512854],
        [ 0.73928313,  0.74163481, -4.17726162,  1.3416553 ],
        [ 0.72070405,  0.7189693 ,  1.30923805, -4.18978309]]))

In [28]:
nested_logit_diversion_ratios.mean(axis=2), true_diversion_ratios.mean(axis=2)

(array([[0.28479711, 0.33297753, 0.19134439, 0.19088096],
        [0.32541291, 0.28728495, 0.19641401, 0.19088814],
        [0.19547647, 0.20791738, 0.28870153, 0.32231153],
        [0.19719596, 0.20488804, 0.32444768, 0.28792372]]),
 array([[0.33115087, 0.30335128, 0.18522023, 0.18027762],
        [0.32317153, 0.32122579, 0.18063565, 0.17496703],
        [0.19329289, 0.17575241, 0.32765373, 0.30330097],
        [0.19192008, 0.17341037, 0.31037504, 0.32429451]]))

# Part 5

## 5.a: Demand-side Estimation only

In [29]:
# BLP, Demand-side estimation only

demand_problem = pyblp.Problem(
    [
        pyblp.Formulation("0 + prices + x + satellite + wired"),
        pyblp.Formulation("0 + satellite + wired")
    ],
    observed_data[[
        "market_ids", 
        "firm_ids",
        "shares", 
        "prices",
        "x",
        "satellite",
        "wired"]],
    integration=pyblp.Integration('product', size=9),
)

Initializing the problem ...
Initialized the problem after 00:00:00.

Dimensions:
 T    N     F     I     K1    K2    MD 
---  ----  ---  -----  ----  ----  ----
600  2400   4   48600   4     2     3  

Formulations:
       Column Indices:             0        1        2        3  
-----------------------------  ---------  -----  ---------  -----
 X1: Linear Characteristics     prices      x    satellite  wired
X2: Nonlinear Characteristics  satellite  wired                  


In [30]:
# we will assume that the random coefficients on satellite and wired are uncorrellated

# this step is going to spit out a lot of text, most of which is not meaningful yet. 
# the first iteration of .solve is only to compute the optimal instruments, and hence these first-step estimates are not very good

demand_problem_w_instruments = demand_problem.solve(sigma=np.identity(2)).compute_optimal_instruments().to_problem()


Solving the problem ...

Nonlinear Coefficient Initial Values:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +1.000000E+00               
  wired    +0.000000E+00  +1.000000E+00

Nonlinear Coefficient Lower Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +0.000000E+00               
  wired    +0.000000E+00  +0.000000E+00

Nonlinear Coefficient Upper Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite      +INF                    
  wired    +0.000000E+00      +INF     

Starting optimization ...

GMM   Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Projected                                
Step   Iterations   Evaluations  Iterations   Evaluations  Shares       Value       Improvement   Gradient Norm             Theta            
----  ------------  -----------  -----------  -----------  -------  ---

In [31]:
# now we resolve the problem given the optimal instruments
demand_problem_results = demand_problem_w_instruments.solve(sigma=np.identity(2),optimization=pyblp.Optimization('l-bfgs-b', {'maxls': 30}))

Solving the problem ...

Nonlinear Coefficient Initial Values:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +1.000000E+00               
  wired    +0.000000E+00  +1.000000E+00

Nonlinear Coefficient Lower Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +0.000000E+00               
  wired    +0.000000E+00  +0.000000E+00

Nonlinear Coefficient Upper Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite      +INF                    
  wired    +0.000000E+00      +INF     

Starting optimization ...

GMM   Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Projected                                
Step   Iterations   Evaluations  Iterations   Evaluations  Shares       Value       Improvement   Gradient Norm             Theta            
----  ------------  -----------  -----------  -----------  -------  ---

These estimates are not bad.

## 5.a: Demand and Supply Estimation

In [32]:
full_problem = pyblp.Problem(
    [
        pyblp.Formulation("0 + prices + x + satellite + wired"),
        pyblp.Formulation("0 + satellite + wired"),
        pyblp.Formulation("1 + w")
    ],
    product_data = observed_data,
    integration=pyblp.Integration('product', size=9),
    costs_type="log"
)

Initializing the problem ...
Initialized the problem after 00:00:00.

Dimensions:
 T    N     F     I     K1    K2    K3    MD    MS 
---  ----  ---  -----  ----  ----  ----  ----  ----
600  2400   4   48600   4     2     2     3     2  

Formulations:
       Column Indices:             0        1        2        3  
-----------------------------  ---------  -----  ---------  -----
 X1: Linear Characteristics     prices      x    satellite  wired
X2: Nonlinear Characteristics  satellite  wired                  
X3: Log Cost Characteristics       1        w                    


In [33]:
# once again, we construct optimal instruments
full_problem_w_instruments = full_problem.solve(sigma=np.identity(2),beta=[-1,None,None,None]).compute_optimal_instruments().to_problem()

Solving the problem ...

Nonlinear Coefficient Initial Values:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +1.000000E+00               
  wired    +0.000000E+00  +1.000000E+00

Beta Initial Values:
   prices            x          satellite        wired    
-------------  -------------  -------------  -------------
-1.000000E+00       NAN            NAN            NAN     

Nonlinear Coefficient Lower Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +0.000000E+00               
  wired    +0.000000E+00  +0.000000E+00

Beta Lower Bounds:
   prices            x          satellite        wired    
-------------  -------------  -------------  -------------
    -INF           -INF           -INF           -INF     

Nonlinear Coefficient Upper Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite      +INF                    
  wired    +0.000000E+00   

In [34]:
# and here are the estimation results
full_problem_results = full_problem_w_instruments.solve(sigma=0.9*np.identity(2),beta=[-1,None,None,None], check_optimality="both")

Solving the problem ...

Nonlinear Coefficient Initial Values:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +9.000000E-01               
  wired    +0.000000E+00  +9.000000E-01

Beta Initial Values:
   prices            x          satellite        wired    
-------------  -------------  -------------  -------------
-1.000000E+00       NAN            NAN            NAN     

Nonlinear Coefficient Lower Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite  +0.000000E+00               
  wired    +0.000000E+00  +0.000000E+00

Beta Lower Bounds:
   prices            x          satellite        wired    
-------------  -------------  -------------  -------------
    -INF           -INF           -INF           -INF     

Nonlinear Coefficient Upper Bounds:
 Sigma:      satellite        wired    
---------  -------------  -------------
satellite      +INF                    
  wired    +0.000000E+00   

These estimates are even better than the previous section. We'll use these in the coming sections.

## 5.b Own-price Elasticities, Diversion Ratios

In [35]:
estimated_price_elasticities = full_problem_results.compute_elasticities()

Computing elasticities with respect to prices ...
Finished after 00:00:01.



In [36]:
estimated_diversion_ratios = full_problem_results.compute_diversion_ratios()

Computing diversion ratios with respect to prices ...
Finished after 00:00:01.



In [37]:
estimated_own_price_elasticities = estimated_price_elasticities.reshape(T,J,J).mean(axis=0)

In [38]:
true_price_elasticities.mean(axis=2), estimated_own_price_elasticities

(array([[-4.06535006,  1.38543391,  0.80172334,  0.7895892 ],
        [ 1.27934133, -4.16553436,  0.71112989,  0.71512854],
        [ 0.73928313,  0.74163481, -4.17726162,  1.3416553 ],
        [ 0.72070405,  0.7189693 ,  1.30923805, -4.18978309]]),
 array([[-4.05026563,  1.36210624,  0.70040876,  0.66790244],
        [ 1.50046032, -4.1558555 ,  0.70040876,  0.66790244],
        [ 0.7378061 ,  0.65818679, -4.16101852,  1.38817984],
        [ 0.7378061 ,  0.65818679,  1.4468293 , -4.17569613]]))

The estimates are pretty close to the true values

In [39]:
estimated_diversion_ratios.reshape((T,J,J)).mean(axis=0)

array([[0.32909482, 0.32544548, 0.17501464, 0.17044505],
       [0.3455732 , 0.3188287 , 0.17046779, 0.16513031],
       [0.18282689, 0.16629782, 0.3246221 , 0.32625318],
       [0.18145558, 0.16389933, 0.33358963, 0.32105546]])

In [40]:
true_diversion_ratios.mean(axis=2)

array([[0.33115087, 0.30335128, 0.18522023, 0.18027762],
       [0.32317153, 0.32122579, 0.18063565, 0.17496703],
       [0.19329289, 0.17575241, 0.32765373, 0.30330097],
       [0.19192008, 0.17341037, 0.31037504, 0.32429451]])

These look reasonably close as well.

# Part 6

In [41]:
# merge firms 1 and 2
observed_data['merger_1_ids'] = observed_data['firm_ids'].replace(2, 1)

# merge firms 1 and 3
observed_data['merger_2_ids'] = observed_data['firm_ids'].replace(3, 1)

In [42]:
marginal_costs = full_problem_results.compute_costs()

merger_1_prices = full_problem_results.compute_prices(
    firm_ids=observed_data['merger_1_ids'],
    costs=marginal_costs
)

merger_2_prices = full_problem_results.compute_prices(
    firm_ids=observed_data['merger_2_ids'],
    costs=marginal_costs
)

Computing marginal costs ...
Finished after 00:00:01.

Solving for equilibrium prices ...
Finished after 00:00:03.

Solving for equilibrium prices ...
Finished after 00:00:03.



In [43]:
np.mean(eq_prices, axis=1)

array([2.73266213, 2.71653207, 2.76078363, 2.73913598])

In [44]:
# relative price changes, merging 1 and 2
np.mean(merger_1_prices.reshape((T,J)),axis=0)  

array([2.97945002, 2.99353235, 2.77124927, 2.74875349])

In [45]:
# relative price changes, merging 1 and 3
np.mean(merger_2_prices.reshape((T,J)),axis=0)

array([2.84694606, 2.72847439, 2.8831668 , 2.75133819])

In [46]:
reduction_factors = np.concatenate([0.85*np.ones([T,2]),np.ones([T,2])],axis=1).reshape((T*J,1))
reduced_costs = marginal_costs * reduction_factors


merger_1_prices_w_cost_reduction = full_problem_results.compute_prices(
    firm_ids=observed_data['merger_1_ids'],
    costs=reduced_costs
)


Solving for equilibrium prices ...
Finished after 00:00:03.



In [47]:
# post-merger relative price changes, 1 and 2 with marginal cost reduction
np.mean(merger_1_prices_w_cost_reduction.reshape((T,J)),axis=0)

array([2.78201464, 2.79398463, 2.76110894, 2.73900857])

In [48]:
pre_merger_surpluses = full_problem_results.compute_consumer_surpluses()
post_merger_surpluses = full_problem_results.compute_consumer_surpluses(prices=merger_1_prices_w_cost_reduction)

Computing consumer surpluses with the equation that assumes away nonlinear income effects ...
Finished after 00:00:01.

Computing consumer surpluses with the equation that assumes away nonlinear income effects ...
Finished after 00:00:01.



In [49]:
# assuming measure of consumers in each market is 1, the net surpluses are just the sums
# this is the net effect on consumer welfare
np.sum(post_merger_surpluses - pre_merger_surpluses)

-6.5718246112984176

In [50]:
post_merger_shares = full_problem_results.compute_shares(merger_1_prices_w_cost_reduction)
pre_merger_profits = full_problem_results.compute_profits()
post_merger_profits = full_problem_results.compute_profits(merger_1_prices_w_cost_reduction, post_merger_shares, reduced_costs)

Computing shares ...
Finished after 00:00:01.

Computing profits ...
Finished after 00:00:01.

Computing profits ...
Finished after 00:00:00.



In [51]:
# once again assuming measure 1 of consumers in each market
# net change in profits
np.sum(post_merger_profits - pre_merger_profits) 

69.19100664228262

In [52]:
# welfare change
np.sum(post_merger_surpluses - pre_merger_surpluses) + np.sum(post_merger_profits - pre_merger_profits) 

62.6191820309842