# pytwoway example

In [2]:
#  we import the pytwoway package 
# (make sure you have installed it using pip install pytwoway)
import pytwoway as tw

## Simulate some data

The package contains function to simulate data. We use this here to keep things simple. If you have your own data, you can import it. Load it as a pandas dataframe and use it as an input. 

As you can see, we will need the following required columns in our data: 

 - `wid`: the worker identifier
 - `fid`: the firm identifier
 - `year`: the time
 - `comp`: the outcome variable, in our case the compensation

In [3]:
# For the example, we simulate data
sim_data = tw.SimTwoWay().sim_network()
display(sim_data)

Unnamed: 0,wid,year,k,alpha,psi,spell,freq,fid,move,comp
0,1,1,5,0.967422,0.114185,1,1,84,False,1.644341
1,1,2,8,0.967422,0.908458,2,3,152,True,-0.109389
2,1,3,8,0.967422,0.908458,2,3,152,False,1.955849
3,1,4,8,0.967422,0.908458,2,3,152,False,1.034616
4,1,5,9,0.967422,1.335178,3,1,183,True,4.729998
...,...,...,...,...,...,...,...,...,...,...
49995,10000,1,1,-0.430727,-0.908458,1,1,22,False,-1.220417
49996,10000,2,4,-0.430727,-0.114185,2,1,67,True,0.341023
49997,10000,3,7,-0.430727,0.604585,3,3,136,True,-0.310735
49998,10000,4,7,-0.430727,0.604585,3,3,136,False,0.101317


## Create a TwoWay object using your data

In [4]:
# We need to specify a column dictionary to make sure columns are named correctly. 
# You can also manually update column names yourself
col_name_dict = {
    'fid': 'fid',    # specicy the column name for the firm identifier 
    'wid': 'wid',    # specicy the column name for the worker identifier 
    'year': 'year',  # specicy the column name for the year
    'comp': 'comp'   # specicy the column name for the outcome variable
}

# create the TwoWay object that will do all the heavy lifting
tw_net = tw.TwoWay(data=sim_data, formatting='long', col_dict=col_name_dict)

## Now we can run the FE estimator

In [4]:
# Optional parameters
fe_params = {
    'ncore': 1, 
    'batch': 1, 
    'ndraw_pii': 50, 
    'ndraw_tr': 5, 
    'check': False, 
    'hetero': False, 
    'out': 'res_fe.json', 
    'con': False, 
    'logfile': '', 
    'levfile': '', 
    'statsonly': False}


# compute the fixed effect decomposition
fe_res = tw_net.fit_fe(user_fe=fe_params)

100%|██████████| 5/5 [00:00<00:00, 223.77it/s]


## We can also run the CRE estimator

In [5]:
# Optional parameters
cre_params = {'ncore': 1, 'ndraw_tr': 5, 'ndp': 50, 'out': 'res_cre.json', 'posterior': False, 'wobtw': False}
KMeans_params = {'n_clusters': 10, 'init': 'k-means++', 'n_init': 500, 'max_iter': 300, 'tol': 0.0001, 'precompute_distances': 'deprecated', 'verbose': 0, 'random_state': None, 'copy_x': True, 'n_jobs': 'deprecated', 'algorithm': 'auto'} # These parameters are specifically for the KMeans algorithm
cluster_params = {'cdf_resolution': 10, 'grouping': 'quantile_all', 'year': None, 'user_KMeans': KMeans_params}
cre_res = tw_net.fit_cre(user_cre=cre_params, user_cluster=cluster_params)

## Finally, we can investigate the results

In [6]:
display(fe_res)
display(cre_res)

{'cores': '1',
 'ndp': '50',
 'ndt': '5',
 'n_firms': '194',
 'n_workers': '10000',
 'n_movers': '9342',
 'n_stayers': '658',
 'mover_quantiles': '[169.0, 182.9018970189702, 191.0, 197.0, 201.4356968215159, 206.0, 210.0, 216.0, 221.0, 228.6442060085837, 252.0]',
 'size_quantiles': '[169.0, 185.0, 195.0, 201.0, 205.0, 210.0, 213.0, 220.0, 225.91288888888892, 231.0, 255.0]',
 'between_firm_var': '0.9952771137426026',
 'var_y': '1.9674165885604504',
 'solver_time': '0.003141746999999917',
 'tot_var': '1.9749358615740908',
 'eps_var_ho': '0.6944922089616817',
 'eps_var_fe': '0.5190707120605959',
 'tr_var_ho': '0.0063540842392182705',
 'tr_cov_ho': '-0.0016462682768020403',
 'var_fe': '0.5888298918123765',
 'cov_fe': '0.17313564231257597',
 'var_ho': '0.5844170298131532',
 'cov_ho': '0.17427896280467575',
 'total_time': '0.0749521255493164'}

{'cores': '1',
 'ndt': '5',
 'n_firms': '194',
 'n_workers': '10000',
 'n_movers': '9342',
 'n_stayers': '658',
 'y1s_y1s': '-0.022118075710381307',
 'y1s_y1s_count': '634',
 'y1s_var': '0.40822821774678264',
 'y1s_var_count': '658',
 'y1m_var': '0.9795200163981378',
 'y1m_var_count': '19848',
 'y2m_var': '0.9732388508738447',
 'y2m_var_count': '19848',
 'y1s_y1m1': '-0.0025762301226147227',
 'y1s_y1m1_count': '658',
 'y1s_y2m1': '-0.0021464529568242575',
 'y1s_y2m1_count': '658',
 'y1m1_y1m1': '5.685936069007484e-05',
 'y1m1_y1m1_count': '19848',
 'y2m1_y1m1': '0.00030242568545481286',
 'y2m1_y1m1_count': '19848',
 'y2m1_y2m1': '0.00040250824642047164',
 'y2m1_y2m1_count': '19848',
 'y1s_y1m2': '-0.0024265973436971783',
 'y1s_y1m2_count': '658',
 'y1s_y2m2': '-0.0037280420030730973',
 'y1s_y2m2_count': '658',
 'y1m2_y1m2': '-0.001117554552676051',
 'y1m2_y1m2_count': '19848',
 'y2m2_y1m2': '-0.0004995491492006349',
 'y2m2_y1m2_count': '19848',
 'y2m2_y2m2': '-0.0005076355338281374',
 