# Estimating AKM, Bias-Corrected AKM, and CRE TWFE models using PyTwoWay

In [1]:
# Navigate to parent folder for import
# Weird code needed for output to show
import os, sys
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout
# Import sim_twfe_network to simulate data
from sim_twfe_network import sim_twfe_network as stwn
from twfe_network import twfe_network as twn

## First, get your data

In [2]:
# For the example, we simulate data
sim_data = stwn().sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,1,0.967422,-0.908458,1,1,22,False,1.337664
1,1,2,1,0.967422,-0.908458,2,1,19,True,0.073877
2,1,3,0,0.967422,-1.335178,3,1,10,True,-0.801005
3,1,4,2,0.967422,-0.604585,4,2,31,True,1.732804
4,1,5,2,0.967422,-0.604585,4,2,31,False,1.329272
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,8,0.967422,0.908458,1,2,152,False,2.799519
49996,10000,2,8,0.967422,0.908458,1,2,152,False,2.214767
49997,10000,3,8,0.967422,0.908458,2,1,166,True,0.861693
49998,10000,4,9,0.967422,1.335178,3,2,170,True,4.571471


## Second, create a twfe_network object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = twn(data=sim_data, col_dict=col_dict) # tw_net for two-way network

## Third, clean your data

In [4]:
tw_net.clean_data()

## Fourth, refactor your data into event study format

In [5]:
tw_net.refactor_es()

## Now we can run AKM

In [6]:
# Optional parameters
akm_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_fe.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
akm_res = tw_net.run_akm_corrected(user_akm=akm_params)

100%|██████████| 5/5 [00:00<00:00, 268.25it/s]


## Fifth, cluster your data for CRE estimates

In [7]:
# Optional parameters
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
tw_net.cluster()

## Now we can run CRE

In [8]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
cre_res = tw_net.run_cre()

## Finally, we can investigate the results

In [9]:
display(akm_res)
display(cre_res)

{'cores': 1,
 'ndp': 50,
 'ndt': 5,
 'nm': 19929,
 'ns': 649,
 'n_firms': 193,
 'n_workers': 10000,
 'n_movers': 9351,
 'mover_quantiles': [151.0,
  187.0,
  195.0,
  199.0,
  203.0,
  208.0,
  210.0,
  215.0,
  221.0,
  228.0,
  263.0],
 'size_quantiles': [152.0,
  190.6020997375328,
  197.0,
  202.0,
  206.0,
  210.0,
  214.0,
  219.0,
  226.0,
  231.75507559395248,
  267.0],
 'between_firm_var': 0.9996932637232635,
 'var_y': 1.989633814110547,
 'solver_time': 0.00248721300000021,
 'tot_var': 1.9896222190630752,
 'eps_var_ho': 0.8008688297454852,
 'eps_var_fe': 0.39022263047588424,
 'tr_var_ho': 0.006941293363416308,
 'tr_cov_ho': -0.0016763412072605406,
 'var_fe': 0.5677799699376128,
 'cov_fe': 0.18184197752540562,
 'var_ho': 0.5622209044447335,
 'cov_ho': 0.1831845069463185,
 'total_time': 0.06609511375427246}

{'ndt': 5,
 'nm': 19929,
 'ns': 649,
 'n_firms': 193,
 'n_workers': 10000,
 'var_y': 1.989633814110547,
 'y1s_y1s': -0.004505914998568813,
 'y1s_y1s_count': 626,
 'y1s_var': 0.36350140334608744,
 'y1s_var_count': 649,
 'y1m_var': 0.9846433032976732,
 'y1m_var_count': 19929,
 'y2m_var': 0.9815634465005953,
 'y2m_var_count': 19929,
 'y1s_y1m1': 0.0017068521843943058,
 'y1s_y1m1_count': 649,
 'y1s_y2m1': 0.003833739212637132,
 'y1s_y2m1_count': 649,
 'y1m1_y1m1': -0.0016717694686182853,
 'y1m1_y1m1_count': 19929,
 'y2m1_y1m1': 0.0010317130610684375,
 'y2m1_y1m1_count': 19929,
 'y2m1_y2m1': -0.00042437540713662975,
 'y2m1_y2m1_count': 19929,
 'y1s_y1m2': 9.57438709336424e-05,
 'y1s_y1m2_count': 649,
 'y1s_y2m2': 0.0016894214447837847,
 'y1s_y2m2_count': 649,
 'y1m2_y1m2': -0.0022923430477213617,
 'y1m2_y1m2_count': 19929,
 'y2m2_y1m2': -0.00086393219224511,
 'y2m2_y1m2_count': 19929,
 'y2m2_y2m2': -0.0021828358079121614,
 'y2m2_y2m2_count': 19929,
 'dym_dym': 1.6034176578645682,
 'dym_dym_