# pytwoway example

In [1]:
import os, sys
# Navigate to parent folder for import
os.chdir('../../..')
from pytwoway import SimTwoWay as stw
from pytwoway import TwoWay as tw

# Weird code needed for output to show
os.chdir('docs-src/source/notebooks')
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout

## First, get your data

In [2]:
# For the example, we simulate data
stw_net = stw()
sim_data = stw_net.sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,8,0.967422,0.908458,1,5,156,False,2.831875
1,1,2,8,0.967422,0.908458,1,5,156,False,2.819833
2,1,3,8,0.967422,0.908458,1,5,156,False,3.077293
3,1,4,8,0.967422,0.908458,1,5,156,False,1.176364
4,1,5,8,0.967422,0.908458,1,5,156,False,4.010982
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,1,-0.430727,-0.908458,1,1,26,False,-0.478747
49996,10000,2,7,-0.430727,0.604585,2,1,130,True,1.440287
49997,10000,3,9,-0.430727,1.335178,3,1,182,True,2.392118
49998,10000,4,7,-0.430727,0.604585,4,2,129,True,0.105051


## Second, create a TwoWay object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = tw(data=sim_data, formatting='long', col_dict=col_dict)

## Now we can run the FE estimator

In [4]:
# Optional parameters
fe_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
fe_res = tw_net.fit_fe(user_fe=fe_params)

100%|██████████| 5/5 [00:00<00:00, 228.85it/s]


## We can also run the CRE estimator

In [5]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
cre_res = tw_net.fit_cre(user_cre=cre_params, user_cluster=cluster_params)

## Finally, we can investigate the results

In [6]:
display(fe_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'n_firms': '196',
 'n_workers': '10000',
 'n_movers': '9330',
 'n_stayers': '670',
 'mover_quantiles': '[149.0, 179.0, 189.0, 196.0, 200.0, 205.0, 208.0, 212.1451612903226, 217.55656108597285, 225.48026315789474, 259.0]',
 'size_quantiles': '[152.0, 182.0, 192.0, 199.0, 204.0, 208.0, 211.0, 216.0, 221.0, 229.0, 266.0]',
 'between_firm_var': '1.010646493110679',
 'var_y': '1.992814162213511',
 'solver_time': '0.003215100999999887',
 'tot_var': '2.0068546056406333',
 'eps_var_ho': '0.7091606084421883',
 'eps_var_fe': '0.5294913141313505',
 'tr_var_ho': '0.008108132690252146',
 'tr_cov_ho': '-0.003507309253231166',
 'var_fe': '0.6075922483892001',
 'cov_fe': '0.17593931335749416',
 'var_ho': '0.6018422800772509',
 'cov_ho': '0.1784265589215105',
 'total_time': '0.07361197471618652'}

{'cores': '1',
 'ndt': '5',
 'n_firms': '196',
 'n_workers': '10000',
 'n_movers': '9330',
 'n_stayers': '670',
 'y1s_y1s': '0.005928449596607331',
 'y1s_y1s_count': '645',
 'y1s_var': '0.4191017286891941',
 'y1s_var_count': '670',
 'y1m_var': '0.9921428346679829',
 'y1m_var_count': '19785',
 'y2m_var': '0.9973358077541287',
 'y2m_var_count': '19785',
 'y1s_y1m1': '0.0017440606581509675',
 'y1s_y1m1_count': '670',
 'y1s_y2m1': '0.00162860793596798',
 'y1s_y2m1_count': '670',
 'y1m1_y1m1': '5.0624844320937406e-05',
 'y1m1_y1m1_count': '19785',
 'y2m1_y1m1': '0.0009916287649128293',
 'y2m1_y1m1_count': '19785',
 'y2m1_y2m1': '-6.978557701015131e-05',
 'y2m1_y2m1_count': '19785',
 'y1s_y1m2': '-0.0019139080801833252',
 'y1s_y1m2_count': '670',
 'y1s_y2m2': '-0.0022417024606969527',
 'y1s_y2m2_count': '670',
 'y1m2_y1m2': '-0.0003277557365144826',
 'y1m2_y1m2_count': '19785',
 'y2m2_y1m2': '0.0016106602234900386',
 'y2m2_y1m2_count': '19785',
 'y2m2_y2m2': '0.0014889081393325346',
 'y2m2_y