### Industrial Organization - Assignment 2
##### Luciano Fabio Busatto Venturim
##### 1st Quarter - 2022
##### EPGE/FGV

In [1]:
import numpy as np
import pandas as pd
import linearmodels as lm
from statsmodels.api import add_constant
from scipy.optimize import minimize
from numba import njit

### Part 1 - Setup

In [2]:
data = pd.read_csv('../data/io_assignment2_data.csv')

Creating the variable $c_{nt}$:

In [3]:
data['replacement'] = data['age'].transform(lambda x: 1 if x==0 else 0)
data['replacement'] = data.groupby('Market')['replacement'].shift(-1)
data.dropna(inplace=True) #for the last periods, we cannot know if the replacements were made.
data['replacement'] = data['replacement'].astype('int')

In [4]:
data.head(10)

Unnamed: 0,Market,MarketSize,t,age,replacement
0,1,1.2,1,0,0
1,1,1.2,2,1,0
2,1,1.2,3,2,0
3,1,1.2,4,3,1
4,1,1.2,5,0,0
5,1,1.2,6,1,0
6,1,1.2,7,2,1
7,1,1.2,8,0,0
8,1,1.2,9,1,0
9,1,1.2,10,2,0


Now, we run a linear probability model.

In [5]:
lpm = lm.IV2SLS(data['replacement'],add_constant(data[['MarketSize','age']]),None,None).fit()

  x = pd.concat(x[::order], 1)


In [6]:
print(lpm.summary)

                            OLS Estimation Summary                            
Dep. Variable:            replacement   R-squared:                      0.1748
Estimator:                        OLS   Adj. R-squared:                 0.1739
No. Observations:                1900   F-statistic:                    353.63
Date:                Tue, Mar 15 2022   P-value (F-stat)                0.0000
Time:                        13:16:11   Distribution:                  chi2(2)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
const         -0.0941     0.0225    -4.1785     0.0000     -0.1382     -0.0500
MarketSize     0.1479     0.0228     6.4751     0.00

### Part 2 - Estimation

To optimize the estimation, I first get the possible values of 'MarketSize's and 'age's in a sorted array. ms_repeat and ages_repeat are just repetition of these arrays to vectorize the algorithm when using all all possible combinations of market sizes and ages.

In [7]:
ms = np.array(sorted(list(set(data['MarketSize']))))
n_ms = len(ms)
ages = np.array(sorted(list(set(data['age']))))
n_ages = len(ages)

In [8]:
ms_repeat = np.tile(ms,(n_ages,1)).T
ages_repeat = np.tile(ages,(n_ms,1))

I also include the index of each data point with respect to the Value Function and CCPs matrices.

In [9]:
data['ms_ind'] = data['MarketSize'].transform(lambda x: np.where(ms == x)[0][0])
data['ages_ind'] = data['age'].transform(lambda x: np.where(ages == x)[0][0])

In [10]:
data.head()

Unnamed: 0,Market,MarketSize,t,age,replacement,ms_ind,ages_ind
0,1,1.2,1,0,0,10,0
1,1,1.2,2,1,0,10,1
2,1,1.2,3,2,0,10,2
3,1,1.2,4,3,1,10,3
4,1,1.2,5,0,0,10,0


In [20]:
def loglikelihood(par, data, iter_max = 1000, tol = 10e-5):
    
    '''This function computes the loglikelihood criterion function for given parameters [theta, alpha, phi] and data using 
    the NFP algorithm. 
    
    It returns the negative of loglikelihood criterion function, a float number.'''
    
    vbig0 = np.zeros((n_ms,n_ages)) #entry (i,j) of the matrix is the value function evaluated at the ith marketsize and jth age
    
    error = 10
    i = 0
    
    while error > tol:
        vbig0_shift = np.roll(vbig0, shift = -1, axis = 1) 
        vbig0_shift[:,-1] = vbig0[:,-1] #the continuation matrix
        vbig1 = np.log(np.exp(par[0]*ms_repeat*(1-par[1]*ages_repeat)+0.9*vbig0_shift) + 
                   np.exp(-par[2] + 0.9*np.tile(vbig0[:,0],(n_ages,1)).T)) + np.euler_gamma 
        error = np.max(np.absolute(vbig1-vbig0))
        vbig0 = vbig1
        
        if i > iter_max:
            return(None)
        i += 1
        
    v0 = par[0]*ms_repeat*(1-par[1]*ages_repeat)+0.9*vbig0_shift
    v1 = -par[2] + 0.9*np.tile(vbig0[:,0],(n_ages,1)).T
    
    ccp0 = np.exp(v0)/(np.exp(v0)+np.exp(v1)) #conditional choice probabilities. indexing is equal to those in vbig0.
    ccp1 = 1 - ccp0
    
    ll = np.log(data['replacement']*ccp1[data['ms_ind'],data['ages_ind']] + (1-data['replacement'])*ccp0[data['ms_ind'],data['ages_ind']])
    
    return(-np.sum(ll))

Optimizing using the "BFGS" method:

In [21]:
opt_par = minimize(loglikelihood, x0 = np.array([1,0.1,1]), args = (data), method = 'BFGS').x

In [22]:
opt_par

array([1.45546067, 0.26154854, 2.58972716])

### Part 3

In [18]:
def ccp(par, iter_max = 1000, tol = 10e-5):
    
    """This function evaluates the conditional choice probabilities for given parameters"""
    
    vbig0 = np.zeros((n_ms,n_ages)) #entry (i,j) of the matrix is the value function evaluated at the ith marketsize and jth age
    
    error = 10
    i = 0
    
    while error > tol:
        vbig0_shift = np.roll(vbig0, shift = -1, axis = 1) 
        vbig0_shift[:,-1] = vbig0[:,-1] #the continuation matrix
        vbig1 = np.log(np.exp(par[0]*ms_repeat*(1-par[1]*ages_repeat)+0.9*vbig0_shift) + 
                   np.exp(-par[2] + 0.9*np.tile(vbig0[:,0],(n_ages,1)).T)) + np.euler_gamma 
        error = np.max(np.absolute(vbig1-vbig0))
        vbig0 = vbig1
        
        if i > iter_max:
            return(None)
        i += 1
        
    v0 = par[0]*ms_repeat*(1-par[1]*ages_repeat)+0.9*vbig0_shift
    v1 = -par[2] + 0.9*np.tile(vbig0[:,0],(n_ages,1)).T
    
    ccp0 = np.exp(v0)/(np.exp(v0)+np.exp(v1)) #conditional choice probabilities. indexing is equal to those in vbig0.
    ccp1 = 1 - ccp0
    
    return(ccp0,ccp1)

To evaluate counterfactuals, I first get the Conditional Choice Probabilities using the estimated parameters.

In [23]:
ccp0, ccp1 = ccp(opt_par)

In this exercise, we want to evaluate the differences in the CCP if the cost of replacement is reduced in 50%, when the market size is equal to 1.

In [35]:
ccp0_cf, ccp1_cf = ccp(opt_par/np.array([1,1,2]))

In [36]:
ccp0_cf_ms1, ccp1_cf_ms1 = ccp0_cf[np.where(ms == 1)[0][0],:], ccp1_cf[np.where(ms == 1)[0][0],:]
ccp0_ms1, ccp1_ms1 = ccp0[np.where(ms == 1)[0][0],:], ccp1[np.where(ms == 1)[0][0],:]

In [48]:
ccps = pd.DataFrame({'Estimated':ccp1_ms1, 'Counterfactual':ccp1_cf_ms1})
ccps = ccps.style.set_caption('Conditional probabilities of replacing the machine when market size is 1')
ccps.index.name = 'Age'

In [49]:
ccps

Unnamed: 0_level_0,Estimated,Counterfactual
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.03957,0.106886
1,0.102411,0.214597
2,0.20805,0.351786
3,0.344282,0.493995
4,0.486808,0.621394
5,0.615294,0.725019
6,0.720228,0.804257
7,0.800673,0.862518
8,0.859908,0.904294
9,0.902338,0.933741


As we can see, the reduction of the cost of replacement increases the probability of replacement of the machine, as expected.