This file useses the purchases of the four FAs that we could find (2239-5-LR21, 2239-8-LR23, 2239-4-LR17 and ID 2239-20-LP13 ) and estimates a demand model. 

    'IDProductoCM' -> j 
    To create product characteristics: 'Tipo de Producto', 'Marca', 'Nombre Producto ONU'
    'Precio Unitario'-> net not including taxes, just changes the scale of the price parameter 
    'Rut Unidad de Compra' -> i 
    To create the groups (k):  'Región Unidad de Compra', 'Sector'
     'year' -> t 

     'Modelo'-> to do the match with the product characteristics. 
        
Variables not directly paired but which could be useful: 
    'Nro Licitación Pública', 'Id Convenio Marco', 'Convenio Marco', 'CodigoOC', 'Fecha Envío OC', 'Cantidad', 'Rut Proveedor', 
    'Nombre Proveedor Sucursal', 'Orgcode_Comprador', 'Entcode_Comprador', 


In [456]:
%reset -f

In [457]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import seaborn as sns
from pandasgui import show
import socket
import numpy as np
from unidecode import unidecode
import unicodedata
import re
from difflib import SequenceMatcher
import pyblp


In [458]:
def calculate_market_shares_with_outside_good(df_transac, share_adjustment = True):
    """
    Calculate market shares using market-specific N_i from the DataFrame
    """
    # Group by product (j) and market (t) to count purchases
    product_counts = df_transac.groupby(['product_ids', 'market_ids']).size().reset_index(name='purchases')
    
    # Get N_i for each market (taking first occurrence since it's constant within market)
    market_populations = df_transac.groupby('market_ids')['N_i'].first().reset_index()
    
    # Merge N_i with product counts
    product_counts = product_counts.merge(market_populations, on='market_ids')
    
    # Calculate market shares
    if share_adjustment: 
        product_counts['shares'] = np.minimum(5*product_counts['purchases'] / product_counts['N_i'], 0.5)
    else: 
        product_counts['shares'] = product_counts['purchases'] / product_counts['N_i']
    
    product_counts['shares_real'] =product_counts['purchases'] / product_counts['N_i']
                                               
    x_vars = [col for col in df_transac.columns if col.startswith('x')] #list of variables starting with x 
    
    # Get product characteristics
    product_chars = df_transac.groupby(['product_ids', 'market_ids']).agg(
        {var: 'first' for var in x_vars} | {'prices': 'first'}
    ).reset_index()
    
    # Merge market shares with product characteristics
    final_df = product_chars.merge(product_counts[['product_ids', 'market_ids', 'shares', 'shares_real']], on=['product_ids', 'market_ids'])
    
    # Calculate outside good share for each market
    market_shares = final_df.groupby('market_ids')['shares'].sum()
    outside_shares = 1 - market_shares
    market_shares_real = final_df.groupby('market_ids')['shares_real'].sum()
    outside_shares_real = 1 - market_shares_real
    
    
    # Create outside good rows
    markets = df_transac['market_ids'].unique()
    outside_goods = pd.DataFrame()
    outside_goods['market_ids'] = markets
    outside_goods['product_ids'] = 0

    for var in x_vars:
        if pd.api.types.is_numeric_dtype(df_transac[var]):
            outside_goods[var] = 0  # Set numeric variables to 0
        else:
            outside_goods[var] = '-'  # Set categorical variables to '-'
    
    outside_goods['prices'] = 0
    outside_goods['shares'] = [outside_shares[t] for t in markets]
    outside_goods['shares_real'] = [outside_shares_real[t] for t in markets]

    # Combine outside goods with other products
    final_df = pd.concat([final_df, outside_goods])
    
    # Sort by market and product ID
    final_df = final_df.sort_values(['market_ids', 'product_ids'])
    
    return final_df

def normalize_shares(group):
    """
    Function to normalize shares for each market
    """
    # Fix shares that are not negative
    total_shares = group[group['shares'] > 0.01]['shares'].sum()
    scaling_factor = 0.99 - group[group['shares'] == 0.01]['shares'].sum()
    if total_shares > 0:  # Avoid division by zero
        group.loc[group['shares'] > 0.01, 'shares'] *= scaling_factor / total_shares
    return group

In [459]:
dest_path = os.path.abspath(os.path.join('..', 'interm_data', 'yearly_data', 'Transacciones', 'joined_cleaned_transac.csv'))
print(f"Absolute path of dest_path: {dest_path}")
transac_df = pd.read_csv(dest_path) 
#show(transac_df)


rename_dict = {
    'Rut Unidad de Compra': 'i',
    'IDProductoCM': 'product_ids',
    'Tipo de Producto': 'x1',
    'Marca' : 'x2',
    'Nombre Proveedor Sucursal' : 'x3',
    'Precio Unitario': 'prices',
}

transac_df.rename(columns=rename_dict, inplace=True)

Absolute path of dest_path: c:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\interm_data\yearly_data\Transacciones\joined_cleaned_transac.csv


In [460]:
# create the k groups based on 'Region Unidad de Compra' and 'Sector' 
transac_df['k'] = transac_df.groupby(['Sector', 'Región Unidad de Compra']).ngroup()

#define the market 
transac_df['month'] = pd.to_datetime(transac_df['Fecha Envío OC']).dt.month
transac_df['trimester'] = ((transac_df['month'] - 1) // 3) + 1
transac_df['semester'] = ((transac_df['month'] - 1) // 6) + 1


transac_df['market_ids3'] = transac_df.groupby(['year', 'month', 'Región Unidad de Compra']).ngroup()
transac_df['market_ids2'] = transac_df.groupby(['year', 'trimester', 'Región Unidad de Compra']).ngroup()
transac_df['market_ids'] = transac_df.groupby(['year', 'semester', 'Región Unidad de Compra']).ngroup()
#transac_df = transac_df.drop('Fecha Envío OC', axis=1)


#create a number of consumers at the market level 
region_counts = transac_df.groupby('Región Unidad de Compra')['i'].nunique()
transac_df['N_i'] = transac_df['Región Unidad de Compra'].map(region_counts)

#drop obs with missing values 
transac_df = transac_df.dropna(subset=['x1', 'x2', 'x3'])


transac_df
#

Unnamed: 0,product_ids,Modelo,x1,x2,Nombre Producto ONU,prices,i,Región Unidad de Compra,Sector,year,...,Puntaje Precio_y,exact_merge_var,k,month,trimester,semester,market_ids3,market_ids2,market_ids,N_i
0,1536899,H-1 MB 2.5 CRDI 6M/T GLS 10S AC 2AB ABS,MINIBUS,HYUNDAI,Minibuses,17098600.0,69.071.700-K,Metropolitana,Municipalidades,2018,...,,,48,2,1,1,6,11,12,216
1,1536914,SANTA FE DM WGN 2.4 6A/T 4WD GLS FULL PE,SUV,HYUNDAI,Automóviles,19740000.0,69.073.500-8,Valparaíso,Municipalidades,2018,...,,,50,2,1,1,8,13,14,99
2,1536916,SANTA FE DM WGN 2.4 6A/T GLS PE,SUV,HYUNDAI,Automóviles,14921513.0,65.154.016-k,Metropolitana,"Gob. Central, Universidades",2018,...,,,24,2,1,1,6,11,12,216
3,1536916,SANTA FE DM WGN 2.4 6A/T GLS PE,SUV,HYUNDAI,Automóviles,14921513.0,65.154.021-6,Metropolitana,"Gob. Central, Universidades",2018,...,,,24,2,1,1,6,11,12,216
4,1536979,NEW MAHINDRA SCORPIO SUV 4X4 ABS 2AB,SUV,MAHINDRA,Automóviles,10497983.0,61.955.100-1,Araucanía,Salud,2018,...,,,86,2,1,1,0,1,1,62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4277,2049176,V700 RAPID 1.4L 2024,FURGÓN,RAM,Mini-furgonetas o furgonetas,12410800.0,65.154.398-3,Tarapacá,"Gob. Central, Universidades",2024,...,,,25,8,3,2,900,388,215,26
4278,2054456,D60 ELITE 7DCT 4X2 AT 2024,SUV,MAXUS,Automóviles,16403950.0,69.130.600-3,Maule,Municipalidades,2024,...,,,47,8,3,2,898,386,213,56
4279,2048705,T8 CE COMFORT 2.0T 4X2 MT 2024,CAMIONETA,JAC,Automóviles,15541506.0,69.071.800-6,Metropolitana,Municipalidades,2024,...,,,48,8,3,2,899,387,214,216
4280,2048790,T8 PRO LUXURY 4X4 MT 2024,CAMIONETA,JAC,Automóviles,18481640.0,69.071.800-6,Metropolitana,Municipalidades,2024,...,,,48,8,3,2,899,387,214,216


In [461]:
# Count unique combinations of x1, x2, x3
unique_combinations = transac_df.groupby(['x1', 'x2', 'x3']).size().reset_index().shape[0]

# Count individual unique values
x1_unique = transac_df['x1'].nunique()
x2_unique = transac_df['x2'].nunique()
x3_unique = transac_df['x3'].nunique()

print(f"Number of unique combinations (x1, x2, x3): {unique_combinations}")
print(f"Number of unique x1 values: {x1_unique}")
print(f"Number of unique x2 values: {x2_unique}")
print(f"Number of unique x3 values: {x3_unique}")

Number of unique combinations (x1, x2, x3): 381
Number of unique x1 values: 10
Number of unique x2 values: 41
Number of unique x3 values: 65


In [462]:
for col in ['x1', 'x2', 'x3']:
    # Calculate the value counts for the current column
    col_counts = transac_df[col].value_counts()
    print( col_counts)
    # Identify values that occur in less than 10% of the observations
    values_to_replace = col_counts[col_counts < len(transac_df) * 0.01].index
    
    # Replace these values with 'Other'
    transac_df[col] = transac_df[col].replace(values_to_replace, 'Other')


    # Count the number of unique values for 'x1', 'x2', and 'x3'
    x1_unique_values = transac_df['x1'].nunique()
    x2_unique_values = transac_df['x2'].nunique()
    x3_unique_values = transac_df['x3'].nunique()

    print(f"Number of unique values in 'x1,x2,x3': {x1_unique_values, x2_unique_values, x3_unique_values}")

x1
CAMIONETA               2189
SUV                      908
MINIBUS                  380
SEDÁN                    254
AMBULANCIAS              184
VEHÍCULOS POLICIALES     119
CAMIÓN LIVIANO           103
FURGÓN                    95
HATCHBACK                 41
CARGO                      8
Name: count, dtype: int64
Number of unique values in 'x1,x2,x3': (9, 41, 65)
x2
HYUNDAI          676
TOYOTA           651
NISSAN           648
MAXUS            339
-                303
CHEVROLET        261
MITSUBISHI       208
GREAT WALL       141
JMC              139
JAC               91
MAZDA             83
KIA               73
FORD              71
VOLKSWAGEN        67
SUZUKI            66
PEUGEOT           49
SSANGYONG         45
RAM               44
RENAULT           40
KIA MOTORS        39
CITROEN           37
CHANGAN           37
MAHINDRA          27
HONDA             26
GREATWALL         25
CHERY             22
FIAT              18
FOTON             13
SUBARU             7
MG                

In [463]:
# Count unique combinations of x1, x2, x3
unique_combinations = transac_df.groupby(['x1', 'x2', 'x3']).size().reset_index().shape[0]

# Count individual unique values
x1_unique = transac_df['x1'].nunique()
x2_unique = transac_df['x2'].nunique()
x3_unique = transac_df['x3'].nunique()

print(f"Number of unique combinations (x1, x2, x3): {unique_combinations}")
print(f"Number of unique x1 values: {x1_unique}")
print(f"Number of unique x2 values: {x2_unique}")
print(f"Number of unique x3 values: {x3_unique}")

Number of unique combinations (x1, x2, x3): 284
Number of unique x1 values: 9
Number of unique x2 values: 19
Number of unique x3 values: 23


In [494]:
show(transac_df)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x14fcb5bd000>

## market level demand
This is without product char

### Transform to product level data

I multiplied the market shares by 5 and then renormalize in cases there were any negative shares or they summed up to more than 1.

In [464]:
print(transac_df['market_ids3'].nunique())
print(transac_df['market_ids2'].nunique())
print(transac_df['market_ids'].nunique())

903
391
218


In [465]:
product_data = calculate_market_shares_with_outside_good(transac_df)
show(product_data)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x14e8eb7f640>

In [466]:
negative_shares_percentage = (product_data['shares'] < 0).mean() * 100
print(f"Percentage of negative shares: {negative_shares_percentage:.2f}%")

Percentage of negative shares: 4.34%


In [467]:
product_data['shares'] = product_data['shares'].apply(lambda x: 0.01 if x < 0 else x)

# Apply normalization by market_ids
product_data = product_data.groupby('market_ids').apply(normalize_shares)

# Reset index if needed
product_data.reset_index(drop=True, inplace=True)

negative_shares_percentage = (product_data['shares'] < 0).mean() * 100
print(f"Percentage of negative shares: {negative_shares_percentage:.2f}%")


Percentage of negative shares: 0.00%


In [468]:
outside_good_shares = product_data[product_data['product_ids'] == 0]['shares'].describe()
print(outside_good_shares)

count    218.000000
mean       0.179570
std        0.269579
min        0.010000
25%        0.010000
50%        0.010000
75%        0.347337
max        0.877500
Name: shares, dtype: float64


In [469]:
sum_market_shares = product_data.groupby('market_ids')['shares'].sum()
markets_with_high_shares = sum_market_shares[sum_market_shares > 1]
print(markets_with_high_shares)

product_data.loc[product_data['product_ids'] == 0, 'shares'] -= 0.001


Series([], Name: shares, dtype: float64)


In [470]:
X1_formulation = pyblp.Formulation('1 + x1 + x2 + x3 + prices') # non random coefficient formulation
X2_formulation = pyblp.Formulation('0 + prices') # variables with random coefficients. 
product_formulations = (X1_formulation, X2_formulation)
product_formulations

(1 + x1 + x2 + x3 + prices, prices)

In [471]:

mc_integration = pyblp.Integration('monte_carlo', size=50, specification_options={'seed': 0})
mc_integration
pr_integration = pyblp.Integration('product', size=5)
pr_integration

mc_problem = pyblp.Problem(product_formulations, product_data, integration=mc_integration)
mc_problem
pr_problem = pyblp.Problem(product_formulations, product_data, integration=pr_integration)
pr_problem

bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-4})


Initializing the problem ...
Initialized the problem after 00:00:00.

Dimensions:
 T    N      I     K1    K2    MD 
---  ----  -----  ----  ----  ----
218  3202  10900   52    1     51 

Formulations:
       Column Indices:           0             1                 2                  3                 4              5             6            7           8                  9                     10             11             12               13           14         15         16          17           18              19              20           21             22           23            24              25            26              27                      28                           29                               30                                31                                 32                                    33                                        34                                             35                               36                  37              38        

Detected collinearity issues with [x2['VOLKSWAGEN'], x3['Salinas y Fabres S.A.']] and at least one other column in X1. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x2['VOLKSWAGEN'], x3['Salinas y Fabres S.A.']] and at least one other column in ZD. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x2['VOLKSWAGEN'], x3['Salinas y Fabres S.A.']] and at least one other column in X1. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x2['VOLKSWAGEN'], x3['Salinas y Fabres S.A.']] and at least one other column in ZD. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.


In [472]:
results1 = mc_problem.solve(sigma=np.ones((1, 1)), optimization=bfgs)

#results2 = pr_problem.solve(sigma=np.ones((1, 1)), optimization=bfgs)
#results2


The model may be under-identified. The total number of unfixed parameters is 53, which is more than the total number of moments, 51. Consider checking whether instruments were properly specified when initializing the problem, and whether parameters were properly configured when solving the problem.
Detected that the 2SLS weighting matrix is nearly singular with condition number +4.124298E+17. To disable singularity checks, set options.singular_tol = numpy.inf.


Solving the problem ...

Nonlinear Coefficient Initial Values:
Sigma:     prices    
------  -------------
prices  +1.000000E+00
Starting optimization ...


At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below:
The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Gradient                  
Step     Time

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +6.413006E+16. To disable singularity checks, set options.singular_tol = numpy.inf.



The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

Computed results after 00:00:08.

Problem Results Summary:
GMM     Objective      Gradient                    Clipped  Weighting Matrix
Step      Value          Norm          Hessian     Shares   Condition Number
----  -------------  -------------  -------------  -------  ----------------
 1    +4.129242E+42  +5.765426E+24  -8.400279E+30   2943     +1.332403E+18  

Starting optimization ...


At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results,

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +2.092498E+17. To disable singularity checks, set options.singular_tol = numpy.inf.
Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +4.082675E+18. To disable singularity checks, set options.singular_tol = numpy.inf.


In [473]:
results3 = mc_problem.solve(sigma=np.eye(1), optimization=bfgs)
results3

Solving the problem ...

Nonlinear Coefficient Initial Values:
Sigma:     prices    
------  -------------
prices  +1.000000E+00
Starting optimization ...



The model may be under-identified. The total number of unfixed parameters is 53, which is more than the total number of moments, 51. Consider checking whether instruments were properly specified when initializing the problem, and whether parameters were properly configured when solving the problem.
Detected that the 2SLS weighting matrix is nearly singular with condition number +4.124298E+17. To disable singularity checks, set options.singular_tol = numpy.inf.



At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below:
The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Gradient                  
Step     Time       Iterations   Evaluations  Iterations   Evaluations  Shares       Value       Improvement       Norm           Theta    
----  -----------  ----------

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +6.413006E+16. To disable singularity checks, set options.singular_tol = numpy.inf.



The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

Computed results after 00:00:10.

Problem Results Summary:
GMM     Objective      Gradient                    Clipped  Weighting Matrix
Step      Value          Norm          Hessian     Shares   Condition Number
----  -------------  -------------  -------------  -------  ----------------
 1    +4.129242E+42  +5.765426E+24  -8.400279E+30   2943     +1.332403E+18  

Starting optimization ...


At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results,

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +2.092498E+17. To disable singularity checks, set options.singular_tol = numpy.inf.
Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +4.082675E+18. To disable singularity checks, set options.singular_tol = numpy.inf.


Problem Results Summary:
GMM     Objective      Gradient                    Clipped  Weighting Matrix  Covariance Matrix
Step      Value          Norm          Hessian     Shares   Condition Number  Condition Number 
----  -------------  -------------  -------------  -------  ----------------  -----------------
 2    +6.623659E+02  +6.758802E-17  +0.000000E+00   2943     +8.406958E+18      +1.334244E+29  

Cumulative Statistics:
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:00:20       No           0             4          33242       100180   

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
Sigma:      prices     
------  ---------------
prices   +1.000000E+00 
        (+3.182645E+15)

Beta Estimates (Robust SEs in Parentheses):
       1         x1['AMBULANCIAS']  x1['CAMIONETA']  x1['CAM

# Market level demand (with prod char)

In [525]:
car_path = os.path.join('..', 'car_data', 'final_matched_data.csv')
print(os.path.abspath(car_path))
car_df_original = pd.read_csv(car_path, encoding = 'latin1')
car_df = car_df_original.copy()

c:\Users\lucas\OneDrive - Yale University\Documents\GitHub\2nd-year-paper\car_data\final_matched_data.csv


### data cleaning

1. select vars that could be relevant, 2. to reduce fixed effects if a variable is categorical and has more than 5 values we remove it. 

In [526]:
char_list = [ 'Motor - Cilindrada', 'Medidas y capacidades - Ancho sin espejos', 'Medidas y capacidades - Largo', 
        'Medidas y capacidades - Alto', 'Performance - Rendimiento en ciudad', 'TransmisiÃ³n y chasis - Motor - tracciÃ³n', 
        'TransmisiÃ³n y chasis - SuspensiÃ³n trasera', 'Confort - Aire acondicionado', 'Confort - Tapizados',
        'Confort - Cierre de puertas', 'Confort - Vidrios (del. - tras.)', 'Confort - Espejos exteriores',
        'Confort - Faros delanteros', 'Confort - Faros antiniebla', 'Confort - Computadora de a bordo', 
        'Confort - DirecciÃ³n asistida', 'Confort - Llantas', 'Seguridad - Airbags', 
        'Seguridad - Alarma e inmovilizador de motor', 'Confort - Sensores de estacionamiento', 'Tipo de Producto']   
            
model_vars = ['Cargos Adicionales OC', 'Monto Total OC', 'Rut Unidad de Compra', 
        'RegiÃÂ³n Unidad de Compra', 'Sector', 'CodigoOC' , 'Marca', 'Precio Unitario',
        'IDProductoCM',  'Modelo']

extra_vars = ['Nro LicitaciÃÂ³n PÃÂºblica', 'Fecha EnvÃÂ­o OC', 'Cantidad', 'Rut Proveedor', 'Nombre Proveedor Sucursal']

car_df = car_df[model_vars + extra_vars + char_list]

# Rename prod char as x1, x2, x3, ...
for n, col in enumerate(char_list, start=1):  # Start numbering from 1
    car_df.rename(columns={col: f'x{n}'}, inplace=True)


car_df = car_df.rename(columns={
    'Fecha EnvÃÂ­o OC': 'Fecha',
    'RegiÃÂ³n Unidad de Compra': 'Region', 
})

#######################
for n, col in enumerate(char_list, start=1):  # Start numbering from 1
    print(f"number of unique values in {f'x{n}'}: {car_df[f'x{n}'].nunique()}")


drop_list = ['x7', 'x12', 'x13', 'x18']
car_df = car_df.drop(drop_list, axis=1)

number of unique values in x1: 25
number of unique values in x2: 24
number of unique values in x3: 29
number of unique values in x4: 28
number of unique values in x5: 18
number of unique values in x6: 3
number of unique values in x7: 18
number of unique values in x8: 5
number of unique values in x9: 5
number of unique values in x10: 3
number of unique values in x11: 4
number of unique values in x12: 10
number of unique values in x13: 19
number of unique values in x14: 4
number of unique values in x15: 1
number of unique values in x16: 3
number of unique values in x17: 3
number of unique values in x18: 8
number of unique values in x19: 5
number of unique values in x20: 3
number of unique values in x21: 2


create markets and number of consumers

In [527]:
car_df['k'] = car_df.groupby(['Sector', 'Region']).ngroup()

# create month-year 
car_df['Fecha'] = pd.to_datetime(car_df['Fecha'], errors='coerce')
car_df['month'] = car_df['Fecha'].dt.month
car_df['year'] = car_df['Fecha'].dt.year
car_df.drop('Fecha', axis=1, inplace=True)

#define the market 
car_df['semester'] = ((car_df['month'] - 1) // 6) + 1
car_df['market_ids'] = car_df.groupby(['year', 'semester', 'Region']).ngroup()
print(car_df['market_ids'].nunique())


#create a number of consumers at the market level 
region_counts = car_df.groupby('Region')['market_ids'].nunique()
car_df['N_i'] = car_df['Region'].map(region_counts)
 

74


In [528]:
print(car_df['Region'].unique())
Norte = ['Arica y Parinacota', 'TarapacÃ\x83Â\x83Ã\x82Â¡', 'Antofagasta', 'Atacama', 'Coquimbo']
Centro = ["Lib. Gral. Bdo. O'Higgins", 'Maule', 'Metropolitana', 'ValparaÃ\x83Â\x83Ã\x82Â\xadso' ]
Sur = ['BÃ\x83Â\x83Ã\x82Â\xado-BÃ\x83Â\x83Ã\x82Â\xado', 'Los RÃ\x83Â\x83Ã\x82Â\xados', 'Ã\x83Â\x83Ã\x82Â\x91uble',
    'Los Lagos', 'AraucanÃ\x83Â\x83Ã\x82Â\xada', 'Magallanes y AntÃ\x83Â\x83Ã\x82Â¡rtica', 'AysÃ\x83Â\x83Ã\x82Â©n']


def get_zone(region):
    if region in Norte:
        return 'Norte'
    elif region in Centro:
        return 'Centro'
    elif region in Sur:
        return 'Sur'
    else:
        return 'Other'  # For any regions not in the lists

# Add the Zona column
car_df['Zona'] = car_df['Region'].apply(get_zone)

car_df['k'] = car_df.groupby(['Sector', 'Zona']).ngroup()


["Lib. Gral. Bdo. O'Higgins"
 'BÃ\x83Â\x83Ã\x82Â\xado-BÃ\x83Â\x83Ã\x82Â\xado'
 'Los RÃ\x83Â\x83Ã\x82Â\xados' 'Ã\x83Â\x83Ã\x82Â\x91uble' 'Maule'
 'Los Lagos' 'Atacama' 'ValparaÃ\x83Â\x83Ã\x82Â\xadso' 'Metropolitana'
 'AraucanÃ\x83Â\x83Ã\x82Â\xada' 'Antofagasta'
 'Magallanes y AntÃ\x83Â\x83Ã\x82Â¡rtica' 'AysÃ\x83Â\x83Ã\x82Â©n'
 'Coquimbo' 'TarapacÃ\x83Â\x83Ã\x82Â¡' 'Arica y Parinacota']


1. convert to numeric values and 2. drop chars with NaNs 

In [529]:
# Function to extract numbers from string and handle missing values
def extract_number(value):
    if pd.isna(value) or value == 'N/D':
        return np.nan
    # Extract numbers using string methods
    if isinstance(value, str):
        # Find first sequence of numbers (including decimals)
        numbers = ''.join(c for c in value if c.isdigit() or c == '.')
        return float(numbers) if numbers else value
    return value


for column in car_df.columns:
    try:
        car_df[column] = car_df[column].apply(extract_number)
    except:
        # If conversion fails, keep the original values
        continue

In [530]:
# Identify columns that start with 'x' and have missing values
x_columns = [col for col in car_df.columns if col.startswith('x')]
x_columns_with_missing = [col for col in x_columns if car_df[col].isna().any()]

non_x_columns = [col for col in car_df.columns if not col.startswith('x')] # Get non-x columns 


car_df = car_df.drop(columns=x_columns_with_missing) # Drop x-columns with missing values

# Get remaining x-columns after dropping
remaining_x_columns = [col for col in car_df.columns if col.startswith('x')]

# Create new names for x-columns
new_x_names = [f'x{i+1}' for i in range(len(remaining_x_columns))]

# Create rename dictionary only for x-columns
rename_dict = dict(zip(remaining_x_columns, new_x_names))


car_df = car_df.rename(columns=rename_dict) # Rename only the x-columns

In [531]:
rename_dict = {
    'IDProductoCM': 'product_ids',
    }

car_df.rename(columns=rename_dict, inplace=True)

In [532]:
# List of columns to convert to numeric
car_df['Monto Total OC'] = car_df['Monto Total OC'].replace(r',.*', '', regex=True)

columns_to_convert = ['Monto Total OC'] 

# Convert the specified columns to numeric
car_df[columns_to_convert] = car_df[columns_to_convert].apply(pd.to_numeric, errors='coerce')
car_df['prices'] = car_df['Monto Total OC'] / car_df['Cantidad']

In [533]:
car_df2 = calculate_market_shares_with_outside_good(car_df, share_adjustment=False)

check that shares 1. are positive and 2. sum upp to less than 1. 

In [534]:
###1. Check for negative shares
# percentage of neg shares. (if share is negative replace by 2%)
negative_shares_percentage = (car_df2['shares'] < 0).mean() * 100
print(f"Percentage of negative shares: {negative_shares_percentage:.2f}%")
car_df2['shares'] = car_df2['shares'].apply(lambda x: 0.02 if x < 0.02 else x)

#shares have to sum up to 1%, normalize  by market_ids
car_df2['shares_sum'] = car_df2.groupby('market_ids')['shares'].transform('sum')
car_df2['shares'] = car_df2['shares'] / (car_df2['shares_sum']+0.05) # add 0.05 since shares have to sum up to less than 1. 

# 3. check that outside shares are not too high and that there are no negative shares
outside_good_shares = car_df2[car_df2['product_ids'] == 0]['shares'].describe()
print(outside_good_shares)
negative_shares_percentage = (car_df2['shares'] < 0).mean() * 100
print(f"Percentage of negative shares: {negative_shares_percentage:.2f}%")

# 4. check that no share sum up to more than 1 
sum_market_shares = car_df2.groupby('market_ids')['shares'].sum()
markets_with_high_shares = sum_market_shares[(sum_market_shares > 1) ]
print(markets_with_high_shares)



Percentage of negative shares: 6.53%
count    74.000000
mean      0.293486
std       0.316287
min       0.000875
25%       0.007099
50%       0.088711
75%       0.634921
max       0.793651
Name: shares, dtype: float64
Percentage of negative shares: 0.00%
Series([], Name: shares, dtype: float64)


### PyBLP without micro-moments

In [586]:
formulation_string = '1 + ' + ' + '.join(new_x_names) + ' + prices'
X1_formulation = pyblp.Formulation(formulation_string) # non random coefficient formulation
X2_formulation = pyblp.Formulation('0 + prices') # variables with random coefficients. 
product_formulations = (X1_formulation, X2_formulation)
product_formulations


(1 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + prices,
 prices)

In [587]:

mc_integration = pyblp.Integration('monte_carlo', size=50, specification_options={'seed': 0})
mc_integration

mc_problem = pyblp.Problem(product_formulations, car_df2, integration=mc_integration)
mc_problem



pr_integration = pyblp.Integration('product', size=5)
pr_integration

pr_problem = pyblp.Problem(product_formulations, car_df2, integration=pr_integration)
pr_problem

bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-4})

Initializing the problem ...


Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in X1. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in ZD. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.


Initialized the problem after 00:00:00.

Dimensions:
 T    N    I     K1    K2    MD 
---  ---  ----  ----  ----  ----
74   521  3700   37    1     36 

Formulations:
       Column Indices:           0      1    2    3                4                           5                           6                      7                       8                                     9                                     10                   11           12                13                       14                             15                      16              17                              18                           19                          20                                     21                                 22                             23                     24                     25                     26              27                  28                                    29                                     30                  31                 32                     3

Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in X1. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in ZD. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.


In [588]:
results1 = mc_problem.solve(sigma=np.ones((1, 1)), optimization=bfgs)


The model may be under-identified. The total number of unfixed parameters is 38, which is more than the total number of moments, 36. Consider checking whether instruments were properly specified when initializing the problem, and whether parameters were properly configured when solving the problem.
Detected that the 2SLS weighting matrix is nearly singular with condition number +2.168993E+26. To disable singularity checks, set options.singular_tol = numpy.inf.


Solving the problem ...

Nonlinear Coefficient Initial Values:
Sigma:     prices    
------  -------------
prices  +1.000000E+00
Starting optimization ...


At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below:
The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Gradient                  
Step     Time

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +2.750369E+26. To disable singularity checks, set options.singular_tol = numpy.inf.



The fixed point computation of delta failed to converge. This problem can sometimes be mitigated by increasing the maximum number of fixed point iterations, increasing the fixed point tolerance, choosing more reasonable initial parameter values, setting more conservative parameter or share bounds, or using different iteration or optimization configurations.

Computed results after 00:00:11.

Problem Results Summary:
GMM     Objective      Gradient                    Clipped  Weighting Matrix
Step      Value          Norm          Hessian     Shares   Condition Number
----  -------------  -------------  -------------  -------  ----------------
 1    -2.925237E+42  +0.000000E+00  +0.000000E+00    440     +2.324039E+20  

Starting optimization ...


At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results,

Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +5.235640E+26. To disable singularity checks, set options.singular_tol = numpy.inf.
Detected that the estimated covariance matrix of aggregate GMM moments is nearly singular with condition number +1.432643E+27. To disable singularity checks, set options.singular_tol = numpy.inf.


## Py-BLP with micro-moments. 

car_df2 is the data with the prices/shares/prod char,  in the documentation called product data
agent_data is the data with the demographics, in our case it has the distribution of demographics. 

\begin{align*}
    &E[\text{i purchases new vehicle} \mid \{\bar{y}_i < \bar{y}_1\}], \\
    &E[\text{i purchases new vehicle} \mid \{\bar{y}_1 \leq \bar{y}_i < \bar{y}_2\}], \\
    &E[\text{i purchases new vehicle} \mid \{\bar{y}_i \geq \bar{y}_2\}];
\end{align*}

\begin{align*}
    &E[f_{si} \mid \{\text{i purchases a minivan}\}], \\
    &E[f_{si} \mid \{\text{i purchases a station wagon}\}], \\
    &E[f_{si} \mid \{\text{i purchases a sport-utility}\}], \\
    &E[f_{si} \mid \{\text{i purchases a full-size van}\}].
\end{align*}


For each market create one obs. for each local agency in the market. 

### Loading data

In [664]:
product_data = car_df2.copy()
product_data['x13'] = 1* (product_data['x12'] == 'SUV')
product_data['x12'] =  1*(product_data['x12'] == 'CAMIONETA')
 
product_data['clustering_ids'] = product_data['market_ids'] #cluster at the market level

In [652]:
# Step 1: Map each market_id to its corresponding Region
market_region_df = car_df[['market_ids', 'Region']].drop_duplicates()

# Step 2: Count the number of observations for each region-sector pair
region_sector_counts = car_df.groupby(['Region', 'Sector', 'k']).size().reset_index(name='count')

# Step 3: Merge the counts into a combined dataset for each market_id and its region
agent_data = (
    market_region_df
    .merge(region_sector_counts, on='Region', how='left')
)

agent_data = agent_data.loc[agent_data.index.repeat(agent_data['count'])]
agent_data = agent_data.reset_index(drop=True)


unique_k = sorted(agent_data['k'].unique()) #list of different k values

dummy_vars = []
for i, k_value in enumerate(unique_k, 1):
    
    var_name = f'a{i}'
    agent_data[var_name] = (agent_data['k'] == k_value).astype(int)
    dummy_vars.append(var_name)
    
# Create the formula string
agent_formulation_string =' + '.join(dummy_vars)
agent_data.drop(['Region', 'Sector'], axis=1, inplace=True)

In [653]:
agent_data['N'] = agent_data['market_ids'].value_counts()[agent_data['market_ids']].values
agent_data = agent_data.drop('count', axis=1)
agent_data['weights'] = 1 / agent_data['N']
m = 5 
for i in range(0, m ): #important: nodes have to start from 0
    agent_data[f'nodes{i}'] = np.random.chisquare(df=3, size=len(agent_data))

### setting up the problem

In [654]:
product_formulations = (
    pyblp.Formulation(formulation_string),
    pyblp.Formulation('1 + prices'),
)
product_formulations

(1 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + prices,
 1 + prices)

In [655]:
agent_formulation = pyblp.Formulation('1 +' + agent_formulation_string)
agent_formulation


1 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + a11 + a12 + a13 + a14 + a15 + a16 + a17 + a18 + a19

In [656]:
problem = pyblp.Problem(product_formulations, car_df2, agent_formulation, agent_data, costs_type='log')
problem


Initializing the problem ...
Initialized the problem after 00:00:00.

Dimensions:
 T    N    I     K1    K2    D    MD 
---  ---  ----  ----  ----  ---  ----
74   521  3885   37    2    20    36 

Formulations:
       Column Indices:          0     1      2    3                4                           5                           6                      7                       8                                     9                                     10                   11           12                13                       14                             15                      16              17                              18                           19                          20                                     21                                 22                             23                     24                     25                     26              27                  28                                    29                                     30                 

Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in X1. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.
Detected collinearity issues with [x5['manual'], x6['tela'], x7['manual'], x8['elÃ\x83Â©ctricos - no tiene'], x8['manuales - manuales'], x9['traseros'], x10['elÃ\x83Â©ctrica, hidrÃ\x83Â¡ulica progresiva'], x10['hidrÃ\x83Â¡ulica'], x11['acero'], x11['aleaciÃ\x83Â³n'], x12['SUV']] and at least one other column in ZD. To disable collinearity checks, set options.collinear_atol = options.collinear_rtol = 0.


Dimensions:
 T    N    I     K1    K2    D    MD 
---  ---  ----  ----  ----  ---  ----
74   521  3885   37    2    20    36 

Formulations:
       Column Indices:          0     1      2    3                4                           5                           6                      7                       8                                     9                                     10                   11           12                13                       14                             15                      16              17                              18                           19                          20                                     21                                 22                             23                     24                     25                     26              27                  28                                    29                                     30                  31                 32                     33                   34    

### Setting up micro moments

In [657]:
micro_dataset = pyblp.MicroDataset(
    name="transactions",
    observations= len(agent_data),
    compute_weights=lambda t, p, a: np.ones((a.size, 1 + p.size)), #define a function that computes the weights and will be used later 
)
micro_dataset

transactions: 3885 Observations in All Markets

In [658]:
avg_outside_share = car_df2[car_df2['product_ids'] == 0]['shares'].mean() #average share of outside good

micro_statistics = car_df.groupby(['k', 'x12']).size().reset_index(name='count') #count of each k, x12 combination

# Add N column which is the count of k values
micro_statistics['N'] = car_df['k'].value_counts()[micro_statistics['k']].values
micro_statistics['share'] = micro_statistics['count'] / micro_statistics['N']
micro_statistics['share'] = micro_statistics['share']/(1 + avg_outside_share) #normalize by the outside share

for i, k_value in enumerate(unique_k, 1):    
    micro_statistics[f'a{i}'] = (micro_statistics['k'] == k_value).astype(int)
    


let's start with only one momnet the probability of suv given a1 = 1

In [669]:
#in our case both x12 and k are categorical variables. hence we should follow the same procedure used to form the 
# moments that are in the form of E(new/income) where income is binned into categories, hence is similar to our setting. 

micro_parts_dict = {}

for _, row in micro_statistics.iterrows():
    part_name = f'E[{row["x12"]}|k={row["k"]}'  # Creates names like 'age_mi_part', 'age_sw_part'
    
    micro_parts_dict[part_name] = pyblp.MicroPart(
        name= part_name,
        dataset=micro_dataset,
        compute_values=lambda t, p, a: np.outer(a.demographics[:, row['k'] + 1], np.r_[0, p.X1[:, 7]])  # Adjust indices as needed
    )
    



In [670]:
inside_suv_a1_part = pyblp.MicroPart( #copying inside_mid_part 
    name="E[{x12 =1}  * {a1=1}]",
    dataset=micro_dataset,
    compute_values=lambda t, p, a: np.outer(a.demographics[:, 2], np.r_[0, p.X1[:, 13]]), #the 0 before X2 is because the outside option has value 0 
)

suv_a1_part= pyblp.MicroPart( #copying mid_part
    name="{a1=1}]",
    dataset=micro_dataset,
    compute_values=lambda t, p, a: np.outer(a.demographics[:, 3], np.r_[1, p.X1[:, 13]]), #note the 1 in X2 
)

In [671]:
compute_ratio = lambda v: v[0] / v[1]
compute_ratio_gradient = lambda v: [1 / v[1], -v[0] / v[1]**2]

In [672]:
micro_moments = [
    pyblp.MicroMoment(
        name="E[age_i | mi_j]",
        value=0.783, 
        parts=[inside_suv_a1_part, suv_a1_part], 
        compute_value=compute_ratio, 
        compute_gradient=compute_ratio_gradient,
    ),]

In [673]:
initial_sigma = np.diag([1,1]) # square matrix with the dimension of X2, in our case a constant and prices. 
initial_pi = np.array(np.ones((2, 1+len(unique_k)))) # dim(x2) x dim(agent formulation)

initial_pi

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1.]])

In [674]:
results = problem.solve(
    sigma=initial_sigma,
    pi=initial_pi,
    optimization=pyblp.Optimization('bfgs', {'gtol': 1e-4}),
    iteration=pyblp.Iteration('squarem', {'atol': 1e-13}),
    #se_type='clustered',
    #W_type='clustered',
    micro_moments=micro_moments,
)
results

Solving the problem ...

Micro Moments:
  Observed         Moment               Part             Dataset     Observations  Markets
-------------  ---------------  ---------------------  ------------  ------------  -------
+7.830000E-01  E[age_i | mi_j]  E[{x12 =1}  * {a1=1}]  transactions      3885        All  
                                       {a1=1}]         transactions      3885        All  

Nonlinear Coefficient Initial Values:
Sigma:        1           prices      |   Pi:          1             a1             a2             a3             a4             a5             a6             a7             a8             a9             a10            a11            a12            a13            a14            a15            a16            a17            a18            a19     
------  -------------  -------------  |  ------  -------------  -------------  -------------  -------------  -------------  -------------  -------------  -------------  -------------  -------------  ----------

The model may be under-identified. The total number of unfixed parameters is 79, which is more than the total number of moments, 37. Consider checking whether instruments were properly specified when initializing the problem, and whether parameters were properly configured when solving the problem.
Detected that the 2SLS weighting matrix is nearly singular with condition number +2.168993E+26. To disable singularity checks, set options.singular_tol = numpy.inf.
compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro mo


Reverted problematic micro moments. Number of reverted elements: 1 out of 1.
Reverted problematic elements in the Jacobian of micro moments with respect to theta. Number of reverted elements: 42 out of 42.
Failed to invert an estimated covariance matrix of GMM moments. One or more data matrices may be highly collinear. Condition number: NAN. The inverse was replaced with null values.
Failed to compute a weighting matrix because of invalid estimated covariances of GMM moments.

Computed results after 00:00:03.


Failed to invert an estimated covariance matrix of linear parameters. One or more data matrices may be highly collinear. Condition number: NAN. The inverse was replaced with null values.

Starting optimization ...



compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN for part '{a1=1}] on transactions: 3885 Observations in All Markets'.



At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below:
Reverted problematic micro moments. Number of reverted elements: 1 out of 1.
Reverted problematic elements in the Jacobian of micro moments with respect to theta. Number of reverted elements: 42 out of 42.
Reverted a problematic GMM objective value.
Reverted problematic elements in the GMM objective gradient. Number of reverted elements: 42 out of 42.

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Gradient                                                                                                                                                                                                  

compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN for part '{a1=1}] on transactions: 3885 Observations in All Markets'.
Failed to compute the condition number of the estimated covariance matrix of micro moments while checking for singularity. To disable singularity checks, set options.singular_tol = numpy.inf.
compute_value of mi


Reverted problematic micro moments. Number of reverted elements: 1 out of 1.
Reverted problematic elements in the Jacobian of micro moments with respect to theta. Number of reverted elements: 42 out of 42.
Reverted a problematic GMM objective value.
Reverted problematic elements in the GMM objective gradient. Number of reverted elements: 42 out of 42.
Failed to invert an estimated covariance matrix of GMM moments. One or more data matrices may be highly collinear. Condition number: NAN. The inverse was replaced with null values.
Failed to compute a weighting matrix because of invalid estimated covariances of GMM moments.

Computed results after 00:01:30.

Problem Results Summary:
GMM     Objective      Gradient         Hessian         Hessian     Clipped  Weighting Matrix
Step      Value          Norm       Min Eigenvalue  Max Eigenvalue  Shares   Condition Number
----  -------------  -------------  --------------  --------------  -------  ----------------
 1    +1.000000E+10  +0.0000

compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN for part '{a1=1}] on transactions: 3885 Observations in All Markets'.



At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below:
Reverted problematic micro moments. Number of reverted elements: 1 out of 1.
Reverted problematic elements in the Jacobian of micro moments with respect to theta. Number of reverted elements: 42 out of 42.
Reverted a problematic GMM objective value.
Reverted problematic elements in the GMM objective gradient. Number of reverted elements: 42 out of 42.

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped    Objective      Objective      Gradient                                                                                                                                                                                                  

compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN for part '{a1=1}] on transactions: 3885 Observations in All Markets'.
Failed to compute the condition number of the estimated covariance matrix of micro moments while checking for singularity. To disable singularity checks, set options.singular_tol = numpy.inf.
compute_value of mi


Reverted problematic micro moments. Number of reverted elements: 1 out of 1.
Reverted problematic elements in the Jacobian of micro moments with respect to theta. Number of reverted elements: 42 out of 42.
Reverted a problematic GMM objective value.
Reverted problematic elements in the GMM objective gradient. Number of reverted elements: 42 out of 42.
Failed to invert an estimated covariance matrix of GMM moments. One or more data matrices may be highly collinear. Condition number: NAN. The inverse was replaced with null values.
Failed to compute a weighting matrix because of invalid estimated covariances of GMM moments.
Failed to invert an estimated covariance matrix of GMM parameters. One or more data matrices may be highly collinear. Condition number: NAN. The inverse was replaced with null values.
Failed to compute standard errors because of invalid estimated covariances of GMM parameters.

Computed results after 00:01:48.

Problem Results Summary:
GMM     Objective      Gradient 

compute_value of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned +INF for part 'E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets'.
compute_gradient of micro moment 'E[age_i | mi_j]: +7.830000E-01 (E[{x12 =1}  * {a1=1}] on transactions: 3885 Observations in All Markets; {a1=1}] on transactions: 3885 Observations in All Markets)' returned NAN for part '{a1=1}] on transactions: 3885 Observations in All Markets'.
Failed to compute the condition number of the estimated covariance matrix of aggregate GMM moments while checking for singularity. To disable singularity checks, set options.singular_tol = numpy.inf.
Failed to c

Problem Results Summary:
GMM     Objective      Gradient         Hessian         Hessian     Clipped  Weighting Matrix
Step      Value          Norm       Min Eigenvalue  Max Eigenvalue  Shares   Condition Number
----  -------------  -------------  --------------  --------------  -------  ----------------
 2    +1.000000E+10  +0.000000E+00  +0.000000E+00   +0.000000E+00     442          NAN       

Cumulative Statistics:
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:03:24       Yes          0             5          2146         6734    

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
Sigma:         1             prices       |   Pi:           1               a1               a2               a3               a4               a5               a6               a7               a8            

In [632]:
show(micro_dataset)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x14fbc1b37f0>

In [666]:
show(product_data)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x15098bbe320>

In [634]:
show(micro_statistics)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x15093c95990>

# display

In [None]:
print(car_df.columns)
show(car_df2)

In [575]:
show(micro_statistics)

PandasGUI INFO — pandasgui.gui — Opening PandasGUI


<pandasgui.gui.PandasGui at 0x150410476d0>

In [None]:
show(car_df[['Rut Unidad de Compra','market_ids', 'product_ids', 'k'] ])