# pytwoway example

In [1]:
import os, sys
# Navigate to parent folder for import
os.chdir('../../..')
from pytwoway import sim_twfe_network
sn = sim_twfe_network.sim_twfe_network
from pytwoway import twfe_network
tn = twfe_network.twfe_network

# Weird code needed for output to show
os.chdir('docs/source/notebooks')
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout

## First, get your data

In [2]:
# For the example, we simulate data
sim_data = sn().sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,0,-0.430727,-1.335178,1,3,12,False,-3.365809
1,1,2,0,-0.430727,-1.335178,1,3,12,False,-1.454216
2,1,3,0,-0.430727,-1.335178,1,3,12,False,-2.237995
3,1,4,3,-0.430727,-0.348756,2,2,52,True,-0.321821
4,1,5,3,-0.430727,-0.348756,2,2,52,False,-2.804593
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,4,-0.430727,-0.114185,1,5,78,False,-1.264178
49996,10000,2,4,-0.430727,-0.114185,1,5,78,False,-0.294214
49997,10000,3,4,-0.430727,-0.114185,1,5,78,False,-0.178720
49998,10000,4,4,-0.430727,-0.114185,1,5,78,False,1.191634


## Second, create a twfe_network object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = tn(data=sim_data, col_dict=col_dict) # tw_net for two-way network

## Third, clean your data

In [4]:
tw_net.clean_data()

## Fourth, refactor your data into event study format

In [5]:
tw_net.refactor_es()

## Now we can run AKM

In [6]:
# Optional parameters
akm_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
akm_res = tw_net.run_akm_corrected(user_akm=akm_params)

100%|██████████| 5/5 [00:00<00:00, 23.89it/s]


## Fifth, cluster your data for CRE estimates

In [7]:
# Optional parameters
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
tw_net.cluster()

## Now we can run CRE

In [8]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
cre_res = tw_net.run_cre()

## Finally, we can investigate the results

In [9]:
display(akm_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'nm': '19876',
 'ns': '641',
 'n_firms': '196',
 'n_workers': '10000',
 'n_movers': '9359',
 'mover_quantiles': '[153.0, 183.1142091152815, 191.0, 195.0, 199.0278728606357, 204.0, 207.0, 213.0, 218.0, 227.44847161572054, 242.0]',
 'size_quantiles': '[157.0, 187.0, 194.0, 198.0, 203.03783783783783, 207.0, 210.0, 217.0, 221.0, 228.89694323144104, 246.0]',
 'between_firm_var': '0.9901993152122719',
 'var_y': '1.9748098517073747',
 'solver_time': '0.03642719999999988',
 'tot_var': '1.9815619279618752',
 'eps_var_ho': '0.7791412507632073',
 'eps_var_fe': '0.37962713728296194',
 'tr_var_ho': '0.01359156599917644',
 'tr_cov_ho': '-0.00855030428975486',
 'var_fe': '0.565381796754361',
 'cov_fe': '0.17785766674093728',
 'var_ho': '0.554792047021932',
 'cov_ho': '0.1845195615196629',
 'total_time': '0.3574190139770508'}

{'ndt': '5',
 'nm': '19876',
 'ns': '641',
 'n_firms': '196',
 'n_workers': '10000',
 'var_y': '1.9748098517073747',
 'y1s_y1s': '-0.018778670136099976',
 'y1s_y1s_count': '623',
 'y1s_var': '0.43099499394218876',
 'y1s_var_count': '641',
 'y1m_var': '0.9818785506383447',
 'y1m_var_count': '19876',
 'y2m_var': '0.9846774666587136',
 'y2m_var_count': '19876',
 'y1s_y1m1': '0.0010900910450677528',
 'y1s_y1m1_count': '641',
 'y1s_y2m1': '0.0011375691227155879',
 'y1s_y2m1_count': '641',
 'y1m1_y1m1': '-0.003670002633836549',
 'y1m1_y1m1_count': '19876',
 'y2m1_y1m1': '-0.0013277022473827153',
 'y2m1_y1m1_count': '19876',
 'y2m1_y2m1': '-0.00132049117105973',
 'y2m1_y2m1_count': '19876',
 'y1s_y1m2': '3.332738550275785e-06',
 'y1s_y1m2_count': '641',
 'y1s_y2m2': '0.0019052486391465664',
 'y1s_y2m2_count': '641',
 'y1m2_y1m2': '0.0003902146143839548',
 'y1m2_y1m2_count': '19876',
 'y2m2_y1m2': '-0.00023627415237117391',
 'y2m2_y1m2_count': '19876',
 'y2m2_y2m2': '-0.0011953236874498625',
 