In [1]:
from configure import *
from functions_homework1 import *
import warnings
warnings.filterwarnings("ignore")


# Problem 1
## 1(a)
In this part I reg ln(s_jt/s_0t) to X_jt,p_jt, I_Diet, I_Regular. This means I treat xi_j as noise in regression.

The estimations are:

const      -3.044239
price      -1.626102
sugar       1.528062
caffeine    1.445800
Diet        0.603640
Regular    -3.647879

In [2]:
Dic_para_q1 = dict()
Dic_para_q2q3 = dict()
# Load the data
data = pd.read_csv('product_data.csv')
data = prepare_data(data)

# Prepare the design matrix X and dependent variable y
X = data[['price', 'sugar', 'caffeine', 'Diet', 'Regular']]
# X = sm.add_constant(X)  # Add a constant term to the model
y = data['log_odds']

# Fit the model
model = sm.OLS(y, X)
model_results = model.fit()

# Print the results
print(model_results.summary())

params_ols = get_parameters(model_results, nested=False, print_results=False)
Dic_para_q1['OLS'] = params_ols

                            OLS Regression Results                            
Dep. Variable:               log_odds   R-squared:                       0.885
Model:                            OLS   Adj. R-squared:                  0.884
Method:                 Least Squares   F-statistic:                     1912.
Date:                Wed, 18 Sep 2024   Prob (F-statistic):               0.00
Time:                        15:48:54   Log-Likelihood:                -1305.2
No. Observations:                1000   AIC:                             2620.
Df Residuals:                     995   BIC:                             2645.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
price         -1.6261      0.021    -77.352      0.0

## 1(a)-2 
In this part I reg ln(s_jt/s_0t) to X_jt,p_jt, I_Diet, I_Regular and 9 dummies for each j (drop ID_1). This means I treat xi_j as fixed within periods in regression.

In [3]:
ID_dummies = pd.get_dummies(data['product_ID'], prefix='ID').astype(int)
ID_dummies = ID_dummies.drop('ID_1', axis=1)

X = pd.concat([X, ID_dummies], axis=1)

y = data['log_odds']

# Fit the model
model = sm.OLS(y, X)
model_results = model.fit()

# Print the results
params_ols_fixed_effect = get_parameters(
    model_results, nested=False, print_results=True)
Dic_para_q1['OLS_fixed_effect'] = params_ols_fixed_effect

alpha (price): -1.6239
beta1 (sugar): 1.5230
beta2 (caffeine): 1.4450
gamma_D (Diet): -2.3990
gamma_R (Regular): -5.5584


## 1(b)


In [4]:
# Construct instruments
data['Z1'] = data['caffeine_extract_price'] * data['caffeine']
data['Z2'] = data['corn_syrup_price'] * data['sugar']

# Prepare the data for IV regression

X = data[['sugar', 'caffeine', 'Diet', 'Regular', 'price']]
instruments = data[['sugar', 'caffeine', 'Diet', 'Regular', 'Z1', 'Z2']]
y = data['log_odds']

# Perform 2SLS estimation
iv_model = IV2SLS(y, X, instruments)
iv_results = iv_model.fit()

# Print the results
params_IV = get_parameters(iv_results, nested=False, print_results=True)
Dic_para_q1['IV'] = params_IV

alpha (price): -1.2405
beta1 (sugar): 1.3486
beta2 (caffeine): 1.3318
gamma_D (Diet): -2.6493
gamma_R (Regular): -6.5599


## 1(c)
Let $M=\sum_{k\neq j} exp(\delta_k)+1$, then $\frac{\partial s_{jt}}{\partial p_{jt}}=\frac{Mexp(\delta_{jt})}{(M+exp(\delta_{jt}))^2}\alpha=\alpha s_{jt}(1-s_{jt})$

In [5]:
def own_price_derivative(alpha, s_jt):
    return alpha * s_jt * (1 - s_jt)

# Function to calculate own-price elasticity


def own_price_elasticity(alpha, p_jt, s_jt):
    return alpha * p_jt * (1 - s_jt)


def own_PD_PE(df, params_ols):
    """take alpha and call the function, return the results"""
    data = df.copy()
    # Calculate own-price derivatives and elasticities
    alpha = params_ols['alpha']
    data['own_price_derivative'] = own_price_derivative(
        alpha, data['market_share'])
    data['own_price_elasticity'] = own_price_elasticity(
        alpha, data['price'], data['market_share'])

    # Calculate mean elasticities for Regular and Diet drinks
    mean_elasticity_regular = data[data['nest']
                                   == 'Regular']['own_price_elasticity'].mean()
    mean_elasticity_diet = data[data['nest'] ==
                                'Diet']['own_price_elasticity'].mean()

    print(
        f"Mean own-price elasticity for Regular drinks: {mean_elasticity_regular:.4f}")
    print(
        f"Mean own-price elasticity for Diet drinks: {mean_elasticity_diet:.4f}")

    return [mean_elasticity_regular, mean_elasticity_diet,data]


[mean_elasticity_regular, mean_elasticity_diet,data] = own_PD_PE(data, params_IV)
Dic_para_q2q3['own_elasticity_regular_1c'] = mean_elasticity_regular
Dic_para_q2q3['own_elasticity_diet_1c'] = mean_elasticity_diet

# # * Plot histograms of elasticities
# plt.figure(figsize=(12, 6))
# plt.subplot(1, 2, 1)
# data[data['nest'] == 'Regular']['own_price_elasticity'].hist(bins=20)
# plt.title('Own-Price Elasticities - Regular Drinks')
# plt.xlabel('Elasticity')
# plt.ylabel('Frequency')

# plt.subplot(1, 2, 2)
# data[data['nest'] == 'Diet']['own_price_elasticity'].hist(bins=20)
# plt.title('Own-Price Elasticities - Diet Drinks')
# plt.xlabel('Elasticity')
# plt.ylabel('Frequency')

# plt.tight_layout()
# plt.show()

Mean own-price elasticity for Regular drinks: -3.6948
Mean own-price elasticity for Diet drinks: -2.6162


## 1(d)
Similarly $\frac{\partial s_{jt}}{\partial p_{1t}}=-\alpha s_{jt}s_{1t}$

In [6]:
# Function to calculate cross-price elasticity
def cross_price_derivative(alpha, s_jt, s_it):
    return -alpha * s_jt * s_it

def cross_price_elasticity(alpha, p_1t, s_jt, s_1t):
    return -alpha * p_1t * s_jt


def cross_PD_PE(df,params_ols):
    data=df.copy()
    alpha = params_ols['alpha']

    # Get product 1's market share for each time period
    product_1_share = data[data['product_ID'] == 1].set_index('t')['market_share']

    # Calculate cross-price derivatives and elasticities
    data['cross_price_derivative'] = data.apply(lambda row: cross_price_derivative(alpha, row['market_share'], product_1_share[row['t']])
                                                if row['product_ID'] != 1 else np.nan, axis=1)

    data['cross_price_elasticity'] = data.apply(lambda row: cross_price_elasticity(alpha, data.loc[(data['product_ID'] == 1) & (data['t'] == row['t']), 'price'].values[0],
                                                row['market_share'], product_1_share[row['t']])
                                                if row['product_ID'] != 1 else np.nan, axis=1)

    # Calculate mean cross-price elasticities
    mean_cross_elasticity_diet = data[(data['nest'] == 'Diet') & (
        data['product_ID'] != 1)]['cross_price_elasticity'].mean()
    mean_cross_elasticity_regular = data[(data['nest'] == 'Regular') & (
        data['product_ID'] != 1)]['cross_price_elasticity'].mean()

    print(
        f"Mean cross-price elasticity between product 1 and Diet sodas: {mean_cross_elasticity_diet:.4f}")
    print(
        f"Mean cross-price elasticity between product 1 and Regular sodas: {mean_cross_elasticity_regular:.4f}")
    return [mean_cross_elasticity_diet,mean_cross_elasticity_regular,data]

[mean_cross_elasticity_diet,mean_cross_elasticity_regular,data]= cross_PD_PE(data,params_IV)
Dic_para_q2q3['cross_elasticity_regular_1d'] = mean_cross_elasticity_regular
Dic_para_q2q3['cross_elasticity_diet_1d'] = mean_cross_elasticity_diet

# * Plot histograms of cross-price elasticities
# plt.figure(figsize=(12, 6))
# plt.subplot(1, 2, 1)
# data[(data['nest'] == 'Diet') & (data['product_ID'] != 1)
#      ]['cross_price_elasticity'].hist(bins=20)
# plt.title('Cross-Price Elasticities - Product 1 vs Diet Drinks')
# plt.xlabel('Elasticity')
# plt.ylabel('Frequency')

# plt.subplot(1, 2, 2)
# data[(data['nest'] == 'Regular') & (data['product_ID'] != 1)
#      ]['cross_price_elasticity'].hist(bins=20)
# plt.title('Cross-Price Elasticities - Product 1 vs Regular Drinks')
# plt.xlabel('Elasticity')
# plt.ylabel('Frequency')

# plt.tight_layout()
# plt.show()

Mean cross-price elasticity between product 1 and Diet sodas: 0.1806
Mean cross-price elasticity between product 1 and Regular sodas: 0.3401


## 1(e)
Use the formula that $\frac{\partial s_{jt}}{\partial p_{it}}=-\alpha s_{jt}s_{it}$

In [7]:
def generate_jacobian(time_period, data, params):
    """
    Generate the Jacobian matrix of price derivatives for a given time period.
    I Do need market_share column in the data pd
    Returns: np.array like Jacobian matrix of price derivatives.
    """
    alpha = params['alpha']
    # Filter data for the given time period
    period_data = data[data['t'] == time_period]
    n_products = len(period_data)
    shares = period_data['market_share'].values

    # Initialize the Jacobian matrix
    jacobian = np.zeros((n_products, n_products))

    # Fill in the Jacobian matrix
    for i in range(n_products):
        for j in range(n_products):
            if i == j:
                # Own-price derivative
                s_it = shares[i]
                jacobian[i, i] = own_price_derivative(alpha, s_it)
            else:
                # Cross-price derivative
                s_jt = shares[j]
                s_it = shares[i]
                jacobian[i, j] = cross_price_derivative(alpha, s_jt,  s_it)

    return jacobian


def print_jacobian(data, params_IV):
    # Generate Jacobian for the last time period
    last_period = data['t'].max()
    jacobian_last_period = generate_jacobian(last_period, data, params_IV)
    print(f"Jacobian matrix for period {last_period}:")
    print(np.round(jacobian_last_period, 3))


data_Q1=data.copy()
print_jacobian(data, params_IV)

Jacobian matrix for period 100:
[[-0.073  0.     0.001  0.002  0.019  0.003  0.035  0.008  0.003  0.003]
 [ 0.    -0.002  0.     0.     0.     0.     0.001  0.     0.     0.   ]
 [ 0.001  0.    -0.013  0.     0.003  0.     0.006  0.001  0.001  0.   ]
 [ 0.002  0.     0.    -0.025  0.006  0.001  0.011  0.002  0.001  0.001]
 [ 0.019  0.     0.003  0.006 -0.229  0.012  0.135  0.029  0.012  0.011]
 [ 0.003  0.     0.     0.001  0.012 -0.047  0.022  0.005  0.002  0.002]
 [ 0.035  0.001  0.006  0.011  0.135  0.022 -0.306  0.053  0.022  0.02 ]
 [ 0.008  0.     0.001  0.002  0.029  0.005  0.053 -0.108  0.005  0.004]
 [ 0.003  0.     0.001  0.001  0.012  0.002  0.022  0.005 -0.049  0.002]
 [ 0.003  0.     0.     0.001  0.011  0.002  0.02   0.004  0.002 -0.043]]


## Compare estimations from different methods

In [8]:
pd.DataFrame(Dic_para_q1)

Unnamed: 0,OLS,OLS_fixed_effect,IV
alpha,-1.626102,-1.623944,-1.240471
beta1,1.528062,1.522991,1.348568
beta2,1.4458,1.444985,1.331827
gamma_D,-2.440599,-2.399023,-2.649342
gamma_R,-6.692118,-5.558367,-6.559863


# Problem 2

## 2(a)

In [9]:
Dic_para_q2 = dict()

In [10]:
# Load the data
data = pd.read_csv('product_data.csv')
data = prepare_data(data)
data['group_share'] = data.groupby(
    ['t', 'nest'])['market_share'].transform('sum')
data['within_group_share'] = data['market_share'] / data['group_share']
data['log_within_share'] = np.log(data['within_group_share'])

# Prepare the design matrix X and dependent variable y
# Create instruments
data['Z1'] = data['caffeine_extract_price'] * data['caffeine']
data['Z2'] = data['corn_syrup_price'] * data['sugar']
exog_list = ['sugar', 'caffeine', 'Diet', 'Regular', 'log_within_share']
X = data[['price']+exog_list]
y = data['log_odds']
instruments = data[['Z1', 'Z2']+exog_list]

# Fit the model
model = sm.OLS(y, X)
model_results = model.fit()
nest_params_ols = get_parameters(
    model_results, nested=True, print_results=False)
Dic_para_q2['nest_OLS'] = nest_params_ols

# Perform 2SLS estimation
iv_model = IV2SLS(y, X, instruments)
iv_results = iv_model.fit()
nest_params_IV = get_parameters(iv_results, nested=True, print_results=False)
Dic_para_q2['nest_params_IV'] = nest_params_IV

## 2(b)

In [11]:
[Nest_mean_elasticity_regular, Nest_mean_elasticity_diet,data] = own_PD_PE(data, nest_params_IV)
Dic_para_q2q3['own_Nest_elasticity_regular_2b'] = Nest_mean_elasticity_regular
Dic_para_q2q3['own_Nest_elasticity_diet_2b'] = Nest_mean_elasticity_diet

Mean own-price elasticity for Regular drinks: -1.1759
Mean own-price elasticity for Diet drinks: -0.8326


## 2(c)

In [12]:
[Nest_mean_cross_elasticity_regular, Nest_mean_cross_elasticity_diet,data] = cross_PD_PE(data, nest_params_IV)
Dic_para_q2q3['cross_Nest_elasticity_regular_2c'] = Nest_mean_cross_elasticity_regular
Dic_para_q2q3['cross_Nest_elasticity_diet_2c'] = Nest_mean_cross_elasticity_diet

Mean cross-price elasticity between product 1 and Diet sodas: 0.0575
Mean cross-price elasticity between product 1 and Regular sodas: 0.1082


## 2(d)

In [13]:
part_2= pd.DataFrame(Dic_para_q2)
part_1= pd.DataFrame(Dic_para_q1)
merge_params_all = pd.concat([part_1,part_2],axis=1)
wanted_lis = [x for x in merge_params_all.columns if 'IV' in x]
merge_params_all[wanted_lis].round(3)

Unnamed: 0,IV,nest_params_IV
alpha,-1.24,-0.395
beta1,1.349,0.388
beta2,1.332,0.398
gamma_D,-2.649,2.532
gamma_R,-6.56,1.918
sigma,,0.78


In [14]:
pd.DataFrame([Dic_para_q2q3]).T

Unnamed: 0,0
own_elasticity_regular_1c,-3.694777
own_elasticity_diet_1c,-2.616178
cross_elasticity_regular_1d,0.340054
cross_elasticity_diet_1d,0.180621
own_Nest_elasticity_regular_2b,-1.175896
own_Nest_elasticity_diet_2b,-0.832623
cross_Nest_elasticity_regular_2c,0.057484
cross_Nest_elasticity_diet_2c,0.108225


## 2(e)

In [15]:
print_jacobian(data, nest_params_IV)

Jacobian matrix for period 100:
[[-0.023  0.     0.     0.001  0.006  0.001  0.011  0.002  0.001  0.001]
 [ 0.    -0.001  0.     0.     0.     0.     0.     0.     0.     0.   ]
 [ 0.     0.    -0.004  0.     0.001  0.     0.002  0.     0.     0.   ]
 [ 0.001  0.     0.    -0.008  0.002  0.     0.004  0.001  0.     0.   ]
 [ 0.006  0.     0.001  0.002 -0.073  0.004  0.043  0.009  0.004  0.003]
 [ 0.001  0.     0.     0.     0.004 -0.015  0.007  0.001  0.001  0.001]
 [ 0.011  0.     0.002  0.004  0.043  0.007 -0.097  0.017  0.007  0.006]
 [ 0.002  0.     0.     0.001  0.009  0.001  0.017 -0.034  0.002  0.001]
 [ 0.001  0.     0.     0.     0.004  0.001  0.007  0.002 -0.015  0.001]
 [ 0.001  0.     0.     0.     0.003  0.001  0.006  0.001  0.001 -0.014]]


## 3


## 3(a)
The FOC is $s_{jt}+(P_{jt}-c_{jt})\frac{\partial s_{jt}}{\partial p_{jt}}=0$

If we believe the market data represents the Nash Equilibrium status, then mutual best response means $\frac{\partial s_{jt}}{\partial p_{jt}}$ is evaluated when $s_{-j,t}$ lies in the EQM level: the point estimate we get is also a function evaluated at EQM.


so we can estimate $\hat c_jt=P_{jt}+s_{jt}/(\frac{\partial s_{jt}}{\partial p_{jt}})$



The can query by own_price_derivative(alpha, s_jt), but actually is stored in data

In [16]:
def calc_cost_Lerner(data):
    data['middle']=data['market_share']/data['own_price_derivative']
    data['cost']=data['price']+data['middle']
    data['middle_2']=data['price']-data['cost']
    data['Lerner']=data['middle_2']/data['price']
    data=data.drop('middle',axis=1).drop('middle_2',axis=1)
    return data

data_Nest=calc_cost_Lerner(data)
# for mean Lerner I think it's mean over time
part_Q2=data_Nest.groupby('product_ID').agg({'Lerner':np.mean})

#seems weird so I go back and check whether derivatives got from MNL makes sense
data_ML=calc_cost_Lerner(data_Q1)  
part_Q1 = data_ML.groupby('product_ID').agg({'Lerner':np.mean})

part_Q2['Lerner_MultiNomialLogit']=part_Q1
part_Q2['Lerner_NestedLogit']=part_Q2['Lerner']
part_Q2=part_Q2.drop('Lerner',axis=1)
part_Q2

Unnamed: 0_level_0,Lerner_MultiNomialLogit,Lerner_NestedLogit
product_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.4249,1.335075
2,0.40902,1.28518
3,0.406436,1.277059
4,0.417436,1.311623
5,0.431794,1.356736
6,0.288568,0.906707
7,0.303915,0.954929
8,0.279151,0.877118
9,0.292641,0.919506
10,0.289598,0.909945


In [17]:
params_IV.values()
params_IV.keys()

dict_keys(['alpha', 'beta1', 'beta2', 'gamma_D', 'gamma_R'])

In [18]:
## MNL demand?
[alpha,beta1,beta2,gammaD,gammaR] = params_IV.values()
RHS_Var=['price','sugar','caffeine','Diet','Regular','t']
demand_df = data[RHS_Var]

temp_t=1
temp_price=[2.81436164900233,
 2.93573518294224,
 2.46730892762876,
 1.54395750915856,
 1.49596097782644,
 3.94999774301218,
 2.45584714519528,
 3.73511809728774,
 2.31672716171843,
 3.48878845421879]



share_function_MNL(params, demand_df, temp_t, temp_price)

NameError: name 'beta1' is not defined

In [19]:
temp_price

[2.81436164900233,
 2.93573518294224,
 2.46730892762876,
 1.54395750915856,
 1.49596097782644,
 3.94999774301218,
 2.45584714519528,
 3.73511809728774,
 2.31672716171843,
 3.48878845421879]

In [20]:
data[['price','cost','Lerner']]

Unnamed: 0,price,cost,Lerner
0,2.814362,-0.035342,1.012558
1,2.935735,0.177135,0.939663
2,2.467309,-0.303180,1.122879
3,1.543958,-1.007364,1.652456
4,1.495961,-1.077978,1.720592
...,...,...,...
995,3.183553,0.547516,0.828017
996,4.671217,0.109944,0.976464
997,3.134615,0.331928,0.894109
998,3.196304,0.555679,0.826149


In [21]:
data

Unnamed: 0,product_ID,nest,price,sugar,caffeine,market_share,caffeine_extract_price,corn_syrup_price,t,outside_share,...,Z1,Z2,own_price_derivative,own_price_elasticity,cross_price_derivative,cross_price_elasticity,middle,cost,middle_2,Lerner
0,1,Diet,2.814362,0.631224,6.752525,0.111141,0.267468,0.251714,1,0.003124,...,1.806083,0.158888,-0.039001,-0.987598,,,-2.849704,-0.035342,2.849704,1.012558
1,2,Diet,2.935735,0.004553,6.784396,0.081787,0.320000,0.253146,1,0.003124,...,2.171004,0.001153,-0.029648,-1.064212,0.003589,0.090872,-2.758600,0.177135,2.758600,0.939663
2,3,Diet,2.467309,0.739947,5.761261,0.085727,0.252531,0.314781,1,0.003124,...,1.454899,0.232921,-0.030943,-0.890568,0.003761,0.095250,-2.770489,-0.303180,2.770489,1.122879
3,4,Diet,1.543958,0.103660,4.468299,0.007187,0.203220,0.227481,1,0.003124,...,0.908049,0.023581,-0.002817,-0.605160,0.000315,0.007986,-2.551321,-1.007364,2.551321,1.652456
4,5,Diet,1.495961,0.971926,4.052750,0.015912,0.156466,0.244453,1,0.003124,...,0.634119,0.237590,-0.006182,-0.581195,0.000698,0.017679,-2.573939,-1.077978,2.573939,1.720592
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,6,Regular,3.183553,4.940455,5.401643,0.039094,0.248804,0.243654,100,0.003402,...,1.343949,1.203760,-0.014831,-1.207704,0.000971,0.049072,-2.636037,0.547516,2.636037,0.828017
996,7,Regular,4.671217,6.488062,5.877076,0.444676,0.226867,0.267637,100,0.003402,...,1.333316,1.736444,-0.097489,-1.024104,0.011048,0.558170,-4.561273,0.109944,4.561273,0.976464
997,8,Regular,3.134615,4.220873,6.415397,0.096230,0.220882,0.242589,100,0.003402,...,1.417048,1.023938,-0.034335,-1.118432,0.002391,0.120791,-2.802687,0.331928,2.802687,0.894109
998,9,Regular,3.196304,6.824942,2.963510,0.040763,0.233535,0.275997,100,0.003402,...,0.692084,1.883662,-0.015437,-1.210435,0.001013,0.051167,-2.640625,0.555679,2.640625,0.826149
