# Estimating AKM, Bias-Corrected AKM, and CRE TWFE models using PyTwoWay

In [1]:
# Navigate to parent folder for import
# Weird code needed for output to show
import os, sys
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout
# Import sim_twfe_network to simulate data
from sim_twfe_network import sim_twfe_network as stwn
from twfe_network import twfe_network as twn

## First, get your data

In [2]:
# For the example, we simulate data
sim_data = stwn().sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,5,-0.430727,0.114185,1,1,81,False,-1.210375
1,1,2,8,-0.430727,0.908458,2,3,167,True,-0.068180
2,1,3,8,-0.430727,0.908458,2,3,167,False,-1.281269
3,1,4,8,-0.430727,0.908458,2,3,167,False,0.296934
4,1,5,6,-0.430727,0.348756,3,1,112,True,-0.934264
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,2,0.967422,-0.604585,1,1,42,False,0.827880
49996,10000,2,6,0.967422,0.348756,2,1,118,True,1.648853
49997,10000,3,2,0.967422,-0.604585,3,3,36,True,0.207598
49998,10000,4,2,0.967422,-0.604585,3,3,36,False,0.306681


## Second, create a twfe_network object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = twn(data=sim_data, col_dict=col_dict) # tw_net for two-way network

## Third, refactor your data into event study format

In [4]:
tw_net.refactor_es()

## Now we can run AKM

In [5]:
# Optional parameters
akm_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
akm_res = tw_net.run_akm_corrected(user_akm=akm_params)

100%|██████████| 5/5 [00:00<00:00, 166.86it/s]


## Fourth, cluster your data for CRE estimates

In [6]:
# Optional parameters
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
tw_net.cluster()

## Now we can run CRE

In [7]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
cre_res = tw_net.run_cre()

## Finally, we can investigate the results

In [8]:
display(akm_res)
display(cre_res)

{'cores': 1,
 'ndp': 50,
 'ndt': 5,
 'nm': 19956,
 'ns': 624,
 'n_firms': 194,
 'n_workers': 10000,
 'n_movers': 9376,
 'mover_quantiles': [162.0,
  184.8992,
  194.0,
  198.0,
  204.0,
  208.0,
  212.0,
  218.0,
  220.0,
  228.3111111111111,
  244.0],
 'size_quantiles': [165.0,
  188.0,
  196.0,
  201.3389578163772,
  207.0,
  211.56501182033097,
  215.0,
  219.2719817767654,
  224.0,
  230.0,
  246.0],
 'between_firm_var': 0.9758955264933821,
 'var_y': 1.946529498385522,
 'solver_time': 0.0027690319999997826,
 'tot_var': 1.9527435878105073,
 'eps_var_ho': 0.7811215966580184,
 'eps_var_fe': 0.38082953707204503,
 'tr_var_ho': 0.01128917195800054,
 'tr_cov_ho': -0.00763834332022516,
 'var_fe': 0.5797880757968057,
 'cov_fe': 0.16648635064649953,
 'var_ho': 0.5709698597720254,
 'cov_ho': 0.17245282557661593,
 'total_time': 0.09407472610473633}

{'ndt': 5,
 'nm': 19956,
 'ns': 624,
 'n_firms': 194,
 'n_workers': 10000,
 'var_y': 1.946529498385522,
 'y1s_y1s': 0.009916170980551468,
 'y1s_y1s_count': 598,
 'y1s_var': 0.3760228778510924,
 'y1s_var_count': 624,
 'y1m_var': 0.9710715256031053,
 'y1m_var_count': 19956,
 'y2m_var': 0.9723065890310795,
 'y2m_var_count': 19956,
 'y1s_y1m1': 7.088794046653793e-05,
 'y1s_y1m1_count': 624,
 'y1s_y2m1': 0.0006368145145786483,
 'y1s_y2m1_count': 624,
 'y1m1_y1m1': -0.0019351948235884174,
 'y1m1_y1m1_count': 19956,
 'y2m1_y1m1': 0.0002620169987173995,
 'y2m1_y1m1_count': 19956,
 'y2m1_y2m1': 0.000926728233062396,
 'y2m1_y2m1_count': 19956,
 'y1s_y1m2': -0.0035889570098504727,
 'y1s_y1m2_count': 624,
 'y1s_y2m2': -0.00024307946298898653,
 'y1s_y2m2_count': 624,
 'y1m2_y1m2': 0.00033636845775197286,
 'y1m2_y1m2_count': 19956,
 'y2m2_y1m2': -9.321199635048633e-05,
 'y2m2_y1m2_count': 19956,
 'y2m2_y2m2': -0.0019517739080585675,
 'y2m2_y2m2_count': 19956,
 'dym_dym': 1.5660872322387438,
 'dym_dy