In [1]:
import sys
import scipy.optimize as opt
import math
import numpy as np
import pandas as pd
import pathlib
import scipy.linalg as linalg
from collections import OrderedDict
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=1000)
pd.set_option("display.width", 1000)
pd.set_option("display.max_rows", 20)

## Model

The data is generated by the following indirect utility function:

$$
u_{ijm} = \beta_0 + x_{1j} (\bar{\beta} + \beta_1^u \nu_{i1}) + \xi_j + \epsilon_{ij}
$$

where $\nu_{i1} \sim N(0, 1)$ and $\epsilon_{ij}$ follows the Type I extreme distribution. Assume $E[\xi_j \mid x_{1j}] = 0$.

In [2]:
path = "Data2019PS2.csv"
dtypes = OrderedDict([
    ("market_id", int), 
    ("product_id", int), 
    ("market_sales", float), 
])
df = pd.read_csv(path, header=None, names=dtypes.keys(), dtype=dtypes)
total_populations = [1000000, 2000000, 4000000]

for p in total_populations:
    df["market_share_{}".format(p)] = df["market_sales"] / p

df

Unnamed: 0,market_id,product_id,market_sales,market_share_1000000,market_share_2000000,market_share_4000000
0,1,1,127379.0,0.127379,0.063689,0.031845
1,1,2,128477.0,0.128477,0.064239,0.032119
2,1,3,132527.0,0.132527,0.066264,0.033132
3,1,4,136916.0,0.136916,0.068458,0.034229
4,1,6,139476.0,0.139476,0.069738,0.034869
5,1,7,141856.0,0.141856,0.070928,0.035464
6,1,9,147506.0,0.147506,0.073753,0.036876
7,2,1,111019.0,0.111019,0.055510,0.027755
8,2,2,111895.0,0.111895,0.055947,0.027974
9,2,3,115405.0,0.115405,0.057702,0.028851


In [3]:
product_chars = pd.DataFrame({
    "product_id": list(range(1, 11)), 
    "char_1": np.arange(0.01, 0.11, 0.01), 
})
df = pd.merge(df, product_chars, on="product_id")
df

Unnamed: 0,market_id,product_id,market_sales,market_share_1000000,market_share_2000000,market_share_4000000,char_1
0,1,1,127379.0,0.127379,0.063689,0.031845,0.01
1,2,1,111019.0,0.111019,0.055510,0.027755,0.01
2,3,1,124636.0,0.124636,0.062318,0.031159,0.01
3,5,1,123446.0,0.123446,0.061723,0.030862,0.01
4,6,1,213350.0,0.213350,0.106675,0.053338,0.01
5,7,1,98717.0,0.098717,0.049358,0.024679,0.01
6,8,1,111019.0,0.111019,0.055510,0.027755,0.01
7,9,1,88669.0,0.088669,0.044334,0.022167,0.01
8,11,1,172153.0,0.172153,0.086077,0.043038,0.01
9,12,1,110871.0,0.110871,0.055435,0.027718,0.01


## (a) Estimate a plain logit model, assuming that $β^u_1 = 0$

In [4]:
market_size = 1000
outside_option_shares = df.groupby("market_id")["market_share_1000000"].apply(lambda d: 1.0 - np.sum(d))
outside_option_shares = outside_option_shares.reset_index()
outside_option_shares.columns = ["market_id", "outside_option_1000000"]
df = pd.merge(df, outside_option_shares, on="market_id").sort_values(["market_id", "product_id"])
df

Unnamed: 0,market_id,product_id,market_sales,market_share_1000000,market_share_2000000,market_share_4000000,char_1,outside_option_1000000
0,1,1,127379.0,0.127379,0.063689,0.031845,0.01,0.045863
1,1,2,128477.0,0.128477,0.064239,0.032119,0.02,0.045863
2,1,3,132527.0,0.132527,0.066264,0.033132,0.03,0.045863
3,1,4,136916.0,0.136916,0.068458,0.034229,0.04,0.045863
4,1,6,139476.0,0.139476,0.069738,0.034869,0.06,0.045863
5,1,7,141856.0,0.141856,0.070928,0.035464,0.07,0.045863
6,1,9,147506.0,0.147506,0.073753,0.036876,0.09,0.045863
7,2,1,111019.0,0.111019,0.055510,0.027755,0.01,0.040055
8,2,2,111895.0,0.111895,0.055947,0.027974,0.02,0.040055
9,2,3,115405.0,0.115405,0.057702,0.028851,0.03,0.040055


In [5]:
y = (np.log(df["market_share_1000000"]) - np.log(df["outside_option_1000000"])).values
X = np.ones([df.shape[0], 2])
X[:, 1] = df["char_1"]
X

array([[1.  , 0.01],
       [1.  , 0.02],
       [1.  , 0.03],
       ...,
       [1.  , 0.06],
       [1.  , 0.07],
       [1.  , 0.08]])

In [6]:
np.linalg.solve(np.transpose(X)@X, np.transpose(X)@y)

array([0.998, 1.973])

## (b)

In order to see the effects of population-potential buyers for this product, redo
(a) with $N = 2000000$ and $N = 4000000$. Discuss which parameter would be biased and explain why this is the case.

TBA: Var mat

In [7]:
for p in total_populations[1:]:
    outside_option_shares = df.groupby("market_id")["market_share_{}".format(p)].apply(lambda d: 1.0 - np.sum(d))
    outside_option_shares = outside_option_shares.reset_index()
    outside_option_shares.columns = ["market_id", "outside_option_{}".format(p)]
    df = pd.merge(df, outside_option_shares, on="market_id")
    y = (np.log(df["market_share_{}".format(p)]) - np.log(df["outside_option_{}".format(p)])).values
    X = np.ones([df.shape[0], 2])
    X[:, 1] = df["char_1"]
    print(np.linalg.solve(np.transpose(X)@X, np.transpose(X)@y))

[-2.17   1.858]
[-3.239  1.854]


## (c)

1. First compute $\delta^*$.
1. Obtain $\xi^*$ as a OLS residual.
1. Search the random coefficient $\beta^u_1$ via MSM.

In [8]:
class BLP(object):
    """
    df should include
        - market_id
        - product_id
        - market_share
        - outside_option_share
    """
    def __init__(self, blp_df, product_chars, char_cols, num_simulations, random_seed=None):
        self.df = blp_df
        self.product_chars = product_chars
        self.char_cols = char_cols
        if random_seed is None:
            random_seed = 1
        
        self.random_state = np.random.RandomState(seed=random_seed)
        self.num_simulations = num_simulations
        self.num_chars = len(char_cols)
        self.num_markets = len(np.unique(df["market_id"]))
        self.num_products = len(np.unique(df["product_id"]))
        self._get_missing_market_product_pairs()
        
        
    def _get_missing_market_product_pairs(self):
        # Compute product_id that does not exist in each market
        temp_df = pd.DataFrame({
            "market_id": np.repeat(range(1, self.num_markets+1), self.num_products), 
            "product_id": np.tile(range(1, self.num_products+1), self.num_markets), 
        })
        temp_df = pd.merge(self.df, temp_df, on=["market_id", "product_id"], how="outer")

        # Existing / missing (market_id, product_id) pair
        existing_market_product_indices = temp_df.loc[
            ~np.isnan(temp_df["market_sales"]), ["market_id", "product_id"]].values - 1
        missing_market_product_indices = temp_df.loc[
            np.isnan(temp_df["market_sales"]), ["market_id", "product_id"]].values - 1

        self.existing_market_product_indices = existing_market_product_indices[
            np.lexsort((existing_market_product_indices[:, 1], existing_market_product_indices[:, 0]))
        ]
        self.missing_market_product_indices = missing_market_product_indices[
            np.lexsort((missing_market_product_indices[:, 1], missing_market_product_indices[:, 0]))
        ]
        
        
    def get_BLP_estimate(self):
        noise_matrix = self.random_state.standard_normal(size=(self.num_simulations, self.num_chars))
        base_mu_matrix = noise_matrix @ self.product_chars.loc[:, self.char_cols].values.reshape(1, 10)
        self.mu_matrices_base = np.repeat((base_mu_matrix)[:, None, :], self.num_markets, axis=1)
        self.mu_matrices_base[
            :, 
            self.missing_market_product_indices[:, 0], 
            self.missing_market_product_indices[:, 1]
        ] = -np.inf
        self.finite_ind = (self.mu_matrices_base != -np.inf)

        self.delta_init = np.zeros(shape=[self.num_markets, self.num_products])
        self.delta_init[
            self.existing_market_product_indices[:, 0], 
            self.existing_market_product_indices[:, 1]
        ] = np.log(self.df["market_share"]) - np.log(self.df["outside_option_share"])
        
        self.log_data_market_share = np.zeros(shape=[self.num_markets, self.num_products])
        self.log_data_market_share[
            self.existing_market_product_indices[:, 0], 
            self.existing_market_product_indices[:, 1]
        ] = np.log(self.df["market_share"])
        
        self.X = np.ones([self.df.shape[0], 2])
        self.X[:, 1:self.num_chars+1] = self.df[self.char_cols]
        self.X_t = np.transpose(self.X)
        self.X_lu = linalg.lu_factor(self.X_t @ self.X)
        
        beta_u_init = 1.0
        result = opt.minimize_scalar(self.msm_obj_func, tol=1e-6)
        return result, self.beta_bar
        
    # outer loop
    def msm_obj_func(self, beta_u):
        exp_mu_matrices = np.copy(self.mu_matrices_base)
        exp_mu_matrices[self.finite_ind] *= beta_u
        exp_mu_matrices = np.exp(exp_mu_matrices)
        
        delta_converged = self.compute_fixed_point(
            exp_mu_matrices, 
            self.log_data_market_share, 
            self.delta_init
        )
        y = delta_converged[
            self.existing_market_product_indices[:, 0], 
            self.existing_market_product_indices[:, 1]
        ]
        self.beta_bar = linalg.lu_solve(self.X_lu, self.X_t@y)
        xi = y - self.X @ self.beta_bar
        return np.sum(np.power(xi, 2))
        
        
    # inner loop
    @staticmethod
    def compute_fixed_point(
        exp_mu_matrices, 
        log_data_market_share, 
        delta_init, 
        eps=1e-6, 
        max_iter=1000):
        delta = delta_init
        for i in range(max_iter):
            choice_prob_matrix = np.exp(delta)[None, :, :] * exp_mu_matrices
            choice_prob_matrix = choice_prob_matrix / (1 + np.sum(choice_prob_matrix, axis=2))[:, :, None]
            simulated_market_share = np.mean(choice_prob_matrix, axis=0)
            if i == 0:
                ind = (simulated_market_share > 0)
            
            log_simulated_market_share = simulated_market_share
            log_simulated_market_share[ind] = np.log(simulated_market_share[ind])

            delta_new = delta + log_data_market_share - log_simulated_market_share
            if np.sum(np.power(delta_new - delta, 2), axis=None) < eps:
                print("The contraction mapping converged in {} iterations.".format(i+1))
                break

            delta = delta_new
        else:
            print("The contraction mapping does not converge in {} iterations.".format(max_iter))

        return delta
    

In [9]:
blp_df = df.loc[
    :, ["market_id", "product_id", "market_sales", "market_share_1000000", "outside_option_1000000", "char_1"]].copy()
blp_df.rename(columns={
    "market_share_1000000": "market_share", 
    "outside_option_1000000": "outside_option_share"
}, inplace=True)
num_simulations = 1000
random_seed = 1

blp_instance = BLP(blp_df, product_chars, ["char_1"], num_simulations, random_seed)

In [10]:
result, beta_bar = blp_instance.get_BLP_estimate()

The contraction mapping converged in 1 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 45 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 29 iterations.
The contraction mapping converged in 29 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.
The contraction mapping converged in 34 iterations.


In [11]:
result

     fun: 0.5585382576164789
    nfev: 15
     nit: 11
 success: True
       x: 1.086891774976817

In [12]:
beta_bar

array([1.   , 1.932])

## (d)

Use the first 10 and 100 markets to estimate the model.

In [13]:
num_simulations = 1000
random_seed = 1
blp_df_10 = blp_df.loc[blp_df["market_id"] <= 10, :]
blp_df_100 = blp_df.loc[blp_df["market_id"] <= 100, :]
blp_instance_10 = BLP(blp_df_10, product_chars, ["char_1"], num_simulations, random_seed)
blp_instance_100 = BLP(blp_df_100, product_chars, ["char_1"], num_simulations, random_seed)

In [14]:
result_10, beta_bar_10 = blp_instance_10.get_BLP_estimate()

The contraction mapping converged in 1 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 21 iterations.
The contraction mapping converged in 1 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping converged in 2 iterations.
The contraction mapping conver

In [15]:
result_10

     fun: 0.0051985218869492755
    nfev: 25
     nit: 21
 success: True
       x: 0.3038002163412557

In [16]:
beta_bar_10

array([0.997, 1.96 ])

In [17]:
result_100, beta_bar_100 = blp_instance_100.get_BLP_estimate()

The contraction mapping converged in 1 iterations.
The contraction mapping converged in 9 iterations.
The contraction mapping converged in 17 iterations.
The contraction mapping converged in 9 iterations.
The contraction mapping converged in 7 iterations.
The contraction mapping converged in 4 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 5 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 5 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 5 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 6 iterations.
The contraction mapping converged in 5 iterations.
The contraction mapping conver

In [18]:
result_100

     fun: 0.05743458643525041
    nfev: 31
     nit: 27
 success: True
       x: 0.6733046149497918

In [19]:
beta_bar_100

array([0.999, 1.947])