# pytwoway example

In [1]:
import os, sys
# Navigate to parent folder for import
os.chdir('../../..')
from pytwoway import sim_twfe_network
sn = sim_twfe_network.sim_twfe_network
from pytwoway import twfe_network
tn = twfe_network.twfe_network

# Weird code needed for output to show
os.chdir('docs-src/source/notebooks')
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout

## First, get your data

In [2]:
# For the example, we simulate data
sim_data = sn().sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,4,0.430727,-0.114185,1,2,73,False,0.771214
1,1,2,4,0.430727,-0.114185,1,2,73,False,0.035170
2,1,3,9,0.430727,1.335178,2,3,173,True,1.890424
3,1,4,9,0.430727,1.335178,2,3,173,False,2.293282
4,1,5,9,0.430727,1.335178,2,3,173,False,1.031803
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,7,0.967422,0.604585,1,2,128,False,2.375694
49996,10000,2,7,0.967422,0.604585,1,2,128,False,1.844047
49997,10000,3,5,0.967422,0.114185,2,2,96,True,1.167899
49998,10000,4,5,0.967422,0.114185,2,2,96,False,0.088703


## Second, create a twfe_network object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = tn(data=sim_data, col_dict=col_dict) # tw_net for two-way network

## Third, clean your data

In [4]:
tw_net.clean_data()

## Fourth, refactor your data into event study format

In [5]:
tw_net.refactor_es()

## Now we can run AKM

In [6]:
# Optional parameters
akm_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
akm_res = tw_net.run_akm_corrected(user_akm=akm_params)

100%|██████████| 5/5 [00:00<00:00, 157.13it/s]


## Fifth, cluster your data for CRE estimates

In [7]:
# Optional parameters
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
tw_net.cluster()

## Now we can run CRE

In [8]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
cre_res = tw_net.run_cre(cre_params)

## Finally, we can investigate the results

In [9]:
display(akm_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'nm': '19891',
 'ns': '628',
 'n_firms': '195',
 'n_workers': '10000',
 'n_movers': '9372',
 'mover_quantiles': '[142.0, 183.2860962566845, 192.0, 196.0, 202.0, 206.0, 209.0, 216.0, 220.0, 229.0, 248.0]',
 'size_quantiles': '[146.0, 187.0, 194.0, 200.02992518703243, 204.96332518337408, 209.0, 212.0, 218.0, 224.0, 231.56803455723542, 248.0]',
 'between_firm_var': '0.9883837155569579',
 'var_y': '1.9784975745432956',
 'solver_time': '0.00286392599999985',
 'tot_var': '1.99383740928026',
 'eps_var_ho': '0.7993938788120438',
 'eps_var_fe': '0.3896476424390207',
 'tr_var_ho': '0.010469772537502465',
 'tr_cov_ho': '-0.005917196869515334',
 'var_fe': '0.5709000088563824',
 'cov_fe': '0.17504710641965776',
 'var_ho': '0.5625305367773484',
 'cov_ho': '0.1797772773768741',
 'total_time': '0.12003612518310547'}

{'ndt': '5',
 'nm': '19891',
 'ns': '628',
 'n_firms': '195',
 'n_workers': '10000',
 'var_y': '1.9784975745432956',
 'y1s_y1s': '0.035297464535563886',
 'y1s_y1s_count': '610',
 'y1s_var': '0.3868875328015895',
 'y1s_var_count': '628',
 'y1m_var': '0.9974726215360628',
 'y1m_var_count': '19891',
 'y2m_var': '1.000243303764853',
 'y2m_var_count': '19891',
 'y1s_y1m1': '-0.00017031517812941057',
 'y1s_y1m1_count': '628',
 'y1s_y2m1': '-0.0005330669520971693',
 'y1s_y2m1_count': '628',
 'y1m1_y1m1': '-0.0015467229192112247',
 'y1m1_y1m1_count': '19891',
 'y2m1_y1m1': '-0.0009852650046617345',
 'y2m1_y1m1_count': '19891',
 'y2m1_y2m1': '-0.00028585089793406107',
 'y2m1_y2m1_count': '19891',
 'y1s_y1m2': '-0.001155578904117844',
 'y1s_y1m2_count': '628',
 'y1s_y2m2': '-0.00042701030842446773',
 'y1s_y2m2_count': '628',
 'y1m2_y1m2': '0.00146974744668433',
 'y1m2_y1m2_count': '19891',
 'y2m2_y1m2': '0.0004047538102877139',
 'y2m2_y1m2_count': '19891',
 'y2m2_y2m2': '-0.0008278402395265007',