# pytwoway example

In [1]:
import os, sys
# Navigate to parent folder for import
os.chdir('../../..')
from pytwoway import sim_twfe_network as sn
from pytwoway import twfe_network as tn

# Weird code needed for output to show
os.chdir('docs-src/source/notebooks')
stdout = sys.stdout
sys.path.insert(1, os.path.join(sys.path[0], '..'))
sys.stdout = stdout

## First, get your data

In [2]:
# For the example, we simulate data
stw_net = sn.SimTwoWay()
sim_data = stw_net.sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,7,0.430727,0.604585,1,2,138,False,-0.316670
1,1,2,7,0.430727,0.604585,1,2,138,False,2.537300
2,1,3,7,0.430727,0.604585,2,1,146,True,2.583865
3,1,4,7,0.430727,0.604585,3,1,133,True,2.452087
4,1,5,5,0.430727,0.114185,4,1,91,True,1.314499
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,8,0.967422,0.908458,1,1,165,False,1.599970
49996,10000,2,8,0.967422,0.908458,2,2,155,True,1.630563
49997,10000,3,8,0.967422,0.908458,2,2,155,False,0.334739
49998,10000,4,9,0.967422,1.335178,3,1,179,True,2.419748


## Second, create a twfe_network object using your data

In [3]:
# We need to specify a column dictionary to make sure columns are named correctly. You can also manually update column names yourself
col_dict = {'fid': 'fid', 'wid': 'wid', 'year': 'year', 'comp': 'comp'}
tw_net = tn.TwoWay(data=sim_data, col_dict=col_dict) # tw_net for two-way network

## Third, prepare your data for running the FE estimator

In [4]:
tw_net.prep_fe()

## Now we can run the FE estimator

In [5]:
# Optional parameters
fe_params = {'ncore': 1, 'batch': 1, 'ndraw_pii': 50, 'ndraw_tr': 5, 'check': False, 'hetero': False, 'out': 'res_akm.json', 'con': False, 'logfile': '', 'levfile': '', 'statsonly': False}
fe_res = tw_net.fit_fe(user_fe=fe_params)

100%|██████████| 5/5 [00:00<00:00, 31.16it/s]


## Fourth, prepare your data for running the CRE estimator

In [6]:
# Optional parameters
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
tw_net.prep_cre(user_cluster=cluster_params)

## Now we can run CRE

In [7]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
cre_res = tw_net.fit_cre(cre_params)

## Finally, we can investigate the results

In [8]:
display(fe_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'nm': '19797.0',
 'ns': '657',
 'n_firms': '196',
 'n_workers': '20454',
 'n_movers': '19797',
 'mover_quantiles': '[161.0, 179.0, 188.0, 194.0, 200.0, 205.0, 210.0, 212.0, 217.0, 225.86945054945056, 246.0]',
 'size_quantiles': '[163.0, 183.39019073569483, 192.78025974025974, 197.0, 203.52039312039312, 208.1031175059952, 213.0, 216.0, 220.0, 229.0, 248.0]',
 'between_firm_var': '0.9791948500221341',
 'var_y': '1.9534475375358358',
 'solver_time': '0.028075437000000036',
 'tot_var': '1.9584771001512542',
 'eps_var_ho': '0.7915537561311395',
 'eps_var_fe': '0.38548201852581543',
 'tr_var_ho': '0.015167787940336514',
 'tr_cov_ho': '-0.008444626700231293',
 'var_fe': '0.5921559314545817',
 'cov_fe': '0.16352784960412886',
 'var_ho': '0.5801498119382077',
 'cov_ho': '0.17021222558782226',
 'total_time': '0.2863938808441162'}

{'ndt': '5',
 'nm': '19797',
 'ns': '657',
 'n_firms': '196',
 'n_workers': '10000',
 'var_y': '1.9534475375358358',
 'y1s_y1s': '-0.01189963890613305',
 'y1s_y1s_count': '639',
 'y1s_var': '0.4300736238962055',
 'y1s_var_count': '657',
 'y1m_var': '0.9740000192841033',
 'y1m_var_count': '19797',
 'y2m_var': '0.9773478869611208',
 'y2m_var_count': '19797',
 'y1s_y1m1': '-0.0038492794840281654',
 'y1s_y1m1_count': '657',
 'y1s_y2m1': '-0.0019106341485113634',
 'y1s_y2m1_count': '657',
 'y1m1_y1m1': '-0.0010446330663667635',
 'y1m1_y1m1_count': '19797',
 'y2m1_y1m1': '-0.00015778611082538767',
 'y2m1_y1m1_count': '19797',
 'y2m1_y2m1': '0.0010682926690437032',
 'y2m1_y2m1_count': '19797',
 'y1s_y1m2': '-5.1502378280613325e-05',
 'y1s_y1m2_count': '657',
 'y1s_y2m2': '-0.0038982376478934985',
 'y1s_y2m2_count': '657',
 'y1m2_y1m2': '0.0005614296192659173',
 'y1m2_y1m2_count': '19797',
 'y2m2_y1m2': '-0.0003095746753201728',
 'y2m2_y1m2_count': '19797',
 'y2m2_y2m2': '-0.000175110414497993