# pytwoway example

In [1]:
import os, sys
# Navigate to parent folder for import
os.chdir('../../..')
from pytwoway import SimTwoWay as sn
from pytwoway import TwoWay as tw

# Weird code needed for output to show
os.chdir('docs-src/source/notebooks')
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout

## First, get your data

In [2]:
# For the example, we simulate data
stw_net = sn()
sim_data = stw_net.sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,6,0.967422,0.348756,1,1,122,False,2.346339
1,1,2,6,0.967422,0.348756,2,2,113,True,1.322358
2,1,3,6,0.967422,0.348756,2,2,113,False,2.588530
3,1,4,7,0.967422,0.604585,3,2,146,True,1.300185
4,1,5,7,0.967422,0.604585,3,2,146,False,0.822884
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,9,0.967422,1.335178,1,1,194,False,1.159669
49996,10000,2,9,0.967422,1.335178,2,1,177,True,1.944489
49997,10000,3,9,0.967422,1.335178,3,3,191,True,1.268290
49998,10000,4,9,0.967422,1.335178,3,3,191,False,2.655199


## Second, create a TwoWay object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = tw(data=sim_data, formatting='long', col_dict=col_dict)

## Now we can run the FE estimator

In [4]:
# Optional parameters
fe_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
fe_res = tw_net.fit_fe(user_fe=fe_params)

100%|██████████| 5/5 [00:00<00:00, 240.84it/s]


## We can also run the CRE estimator

In [5]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
cre_res = tw_net.fit_cre(user_cre=cre_params, user_cluster=cluster_params)

## Finally, we can investigate the results

In [6]:
display(fe_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'n_firms': '196',
 'n_workers': '10000',
 'n_movers': '9350',
 'n_stayers': '650',
 'mover_quantiles': '[152.0, 180.0, 187.05284974093266, 192.0, 197.00445544554455, 202.0, 208.0, 212.79953703703706, 220.0, 230.39094827586206, 250.0]',
 'size_quantiles': '[156.0, 183.69809264305178, 192.0, 196.0, 201.0, 206.0, 211.0, 215.82459396751742, 223.14451901565997, 233.0, 253.0]',
 'between_firm_var': '0.9983405927085395',
 'var_y': '1.9681988699051927',
 'solver_time': '0.002823090000000139',
 'tot_var': '1.9764342180333196',
 'eps_var_ho': '0.6933437002423121',
 'eps_var_fe': '0.5171387190883991',
 'tr_var_ho': '0.007764336651059382',
 'tr_cov_ho': '-0.0028072062760941307',
 'var_fe': '0.5831321245228246',
 'cov_fe': '0.17753376275854013',
 'var_ho': '0.5777487706192521',
 'cov_ho': '0.17948012154535067',
 'total_time': '0.07208514213562012'}

{'cores': '1',
 'ndt': '5',
 'n_firms': '196',
 'n_workers': '10000',
 'n_movers': '9350',
 'n_stayers': '650',
 'y1s_y1s': '-0.021386483485042164',
 'y1s_y1s_count': '623',
 'y1s_var': '0.4267006684695281',
 'y1s_var_count': '650',
 'y1m_var': '0.9670388867052682',
 'y1m_var_count': '19733',
 'y2m_var': '0.9791804969423946',
 'y2m_var_count': '19733',
 'y1s_y1m1': '0.0023195045757251394',
 'y1s_y1m1_count': '650',
 'y1s_y2m1': '-0.0040995046188736384',
 'y1s_y2m1_count': '650',
 'y1m1_y1m1': '0.0003777128399447458',
 'y1m1_y1m1_count': '19733',
 'y2m1_y1m1': '0.0010694965229037844',
 'y2m1_y1m1_count': '19733',
 'y2m1_y2m1': '0.001101788224190964',
 'y2m1_y2m1_count': '19733',
 'y1s_y1m2': '0.002851346485028494',
 'y1s_y1m2_count': '650',
 'y1s_y2m2': '0.000846075703050608',
 'y1s_y2m2_count': '650',
 'y1m2_y1m2': '-8.014982973032403e-05',
 'y1m2_y1m2_count': '19733',
 'y2m2_y1m2': '0.0013816618786702177',
 'y2m2_y1m2_count': '19733',
 'y2m2_y2m2': '-0.00047697472543990397',
 'y2m2_y2