Set-up

In [1]:
import pandas as pd
import pytwoway as tw
import bipartitepandas as bpd

Method parameters

In [2]:
fecontrol_params = tw.fecontrol_params(
    {
        'he': True,
        'ncore': 1,
        'attach_fe_estimates': True,
        'categorical_controls': ['occ', 'age_bin', 'exp_cs_bin'],
        'Q_var': [
            tw.Q.VarCovariate('psi'),
            tw.Q.VarCovariate('alpha'),
            tw.Q.VarCovariate(['occ', 'age_bin', 'exp_cs_bin'])
        ],
        'Q_cov': [
            tw.Q.CovCovariate('psi', 'alpha'),
            tw.Q.CovCovariate('alpha', ['occ', 'age_bin', 'exp_cs_bin']),
            tw.Q.CovCovariate('psi', ['occ', 'age_bin', 'exp_cs_bin'])
        ]
    }
)

clean_params = bpd.clean_params(
    {
        'connectedness': 'leave_out_spell',
        'collapse_at_connectedness_measure': True,
        'drop_single_stayers': True,
        'drop_returns': 'returners',
        'copy': False
    }
)

Load 2010-2014 data

In [3]:
dat = pd.read_csv('C:/Users/Public/Documents/Wages and FTC/Data/data_akm_python_1014_joint.csv')

Full AKM estimation - With Controls - Siren

In [4]:
bdf = bpd.BipartiteDataFrame(
    i = dat['ident_all'], j = dat['siren'], y = dat['l_hwr_agg_w2'], t = dat['year'], 
    occ = dat['cs_clean'], exp_cs_bin = dat['exp_cs_bin'], age_bin = dat['age_bin'],
    custom_categorical_dict={'occ': True, 'exp_cs_bin': True, 'age_bin': True},
    custom_dtype_dict={'occ': 'categorical', 'exp_cs_bin': 'categorical', 'age_bin': 'categorical'},
    custom_how_collapse_dict={'occ': 'first', 'exp_cs_bin': 'first', 'age_bin': 'first'},
    track_id_changes = True
)

In [5]:
file = open('C:/Users/Public/Documents/Wages and FTC/New Results/AKM approach/akm_1014_full_withc_siren_summary_pre_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [6]:
bdf = bdf.clean(clean_params)

  frame.loc[:, cols] = factorized[0].reshape((n_rows, n_cols))
  frame.loc[:, cols] = factorized[0].reshape((n_rows, n_cols))


In [7]:
file = open('C:/Users/Public/Documents/Wages and FTC/New Results/AKM approach/akm_1014_full_withc_siren_summary_post_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [8]:
fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)

fe_estimator.fit()

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

In [9]:
fe_out = fe_estimator.summary

with open('C:/Users/Public/Documents/Wages and FTC/New Results/AKM approach/akm_full_ftc_withc_siren_fe_summary.txt', 'w') as f:
    for key, value in fe_out.items():
        f.write(f'{key}; {value}\n')

In [10]:
df = bdf.original_ids()
df.to_csv('C:/Users/Public/Documents/Wages and FTC/Data/akm_1014_full_withc_siren.csv')

  frame.loc[:, id_col] = frame.loc[:, id_col].astype('Int64', copy=False)
  frame.loc[:, id_col] = frame.loc[:, id_col].astype('Int64', copy=False)


FTC AKM estimation - With Controls - Siren

In [None]:
tmp = dat[dat['cdi'] == False]

bdf = bpd.BipartiteDataFrame(
    i = tmp['ident_all'], j = tmp['siren'], y = tmp['l_hwr_agg_w2'], t = tmp['year'], 
    occ = tmp['cs_clean'], exp_cs_bin = tmp['exp_cs_bin'], age_bin = tmp['age_bin'],
    custom_categorical_dict={'occ': True, 'exp_cs_bin': True, 'age_bin': True},
    custom_dtype_dict={'occ': 'categorical', 'exp_cs_bin': 'categorical', 'age_bin': 'categorical'},
    custom_how_collapse_dict={'occ': 'first', 'exp_cs_bin': 'first', 'age_bin': 'first'},
    track_id_changes = True
)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_ftc_withc_siren_summary_pre_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
bdf = bdf.clean(clean_params)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_ftc_withc_siren_summary_post_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)

fe_estimator.fit()

In [None]:
fe_out = fe_estimator.summary

with open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_ftc_withc_siren_fe_summary.txt', 'w') as f:
    for key, value in fe_out.items():
        f.write(f'{key}; {value}\n')

In [None]:
df = bdf.original_ids()
df.to_csv('C:/Users/Public/Documents/Wages and FTC/Data/akm_1014_ftc_withc_siren.csv')

OEC AKM estimation - With Controls - Siren

In [None]:
tmp = dat[dat['cdi'] == True]

bdf = bpd.BipartiteDataFrame(
    i = tmp['ident_all'], j = tmp['siren'], y = tmp['l_hwr_agg_w2'], t = tmp['year'], 
    occ = tmp['cs_clean'], exp_cs_bin = tmp['exp_cs_bin'], age_bin = tmp['age_bin'],
    custom_categorical_dict={'occ': True, 'exp_cs_bin': True, 'age_bin': True},
    custom_dtype_dict={'occ': 'categorical', 'exp_cs_bin': 'categorical', 'age_bin': 'categorical'},
    custom_how_collapse_dict={'occ': 'first', 'exp_cs_bin': 'first', 'age_bin': 'first'},
    track_id_changes = True
)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_oec_withc_siren_summary_pre_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
bdf = bdf.clean(clean_params)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_oec_withc_siren_summary_post_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)

fe_estimator.fit()

In [None]:
fe_out = fe_estimator.summary

with open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_1014_oec_withc_siren_fe_summary.txt', 'w') as f:
    for key, value in fe_out.items():
        f.write(f'{key}; {value}\n')

In [None]:
df = bdf.original_ids()
df.to_csv('C:/Users/Public/Documents/Wages and FTC/Data/akm_1014_oec_withc_siren.csv')

Load 2005-2009 data

In [None]:
dat = pd.read_csv('C:/Users/Public/Documents/Wages and FTC/Data/data_akm_python_0509_joint.csv')

FTC AKM estimation - With Controls - Siren

In [None]:
tmp = dat[dat['cdi'] == False]

bdf = bpd.BipartiteDataFrame(
    i = tmp['ident_all'], j = tmp['siren'], y = tmp['l_hwr_agg_w2'], t = tmp['year'], 
    occ = tmp['cs_clean'], exp_cs_bin = tmp['exp_cs_bin'], age_bin = tmp['age_bin'],
    custom_categorical_dict={'occ': True, 'exp_cs_bin': True, 'age_bin': True},
    custom_dtype_dict={'occ': 'categorical', 'exp_cs_bin': 'categorical', 'age_bin': 'categorical'},
    custom_how_collapse_dict={'occ': 'first', 'exp_cs_bin': 'first', 'age_bin': 'first'},
    track_id_changes = True
)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_ftc_withc_siren_summary_pre_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
bdf = bdf.clean(clean_params)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_ftc_withc_siren_summary_post_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)

fe_estimator.fit()

In [None]:
fe_out = fe_estimator.summary

with open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_ftc_withc_siren_fe_summary.txt', 'w') as f:
    for key, value in fe_out.items():
        f.write(f'{key}; {value}\n')

In [None]:
df = bdf.original_ids()
df.to_csv('C:/Users/Public/Documents/Wages and FTC/Data/akm_0509_ftc_withc_siren.csv')

OEC AKM estimation - With Controls - Siren

In [None]:
tmp = dat[dat['cdi'] == True]

bdf = bpd.BipartiteDataFrame(
    i = tmp['ident_all'], j = tmp['siren'], y = tmp['l_hwr_agg_w2'], t = tmp['year'], 
    occ = tmp['cs_clean'], exp_cs_bin = tmp['exp_cs_bin'], age_bin = tmp['age_bin'],
    custom_categorical_dict={'occ': True, 'exp_cs_bin': True, 'age_bin': True},
    custom_dtype_dict={'occ': 'categorical', 'exp_cs_bin': 'categorical', 'age_bin': 'categorical'},
    custom_how_collapse_dict={'occ': 'first', 'exp_cs_bin': 'first', 'age_bin': 'first'},
    track_id_changes = True
)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_oec_withc_siren_summary_pre_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
bdf = bdf.clean(clean_params)

In [None]:
file = open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_oec_withc_siren_summary_post_clean.txt', 'w')

sys.stdout = file

bdf.summary()

sys.stdout = sys.__stdout__

file.close()

In [None]:
fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)

fe_estimator.fit()

In [None]:
fe_out = fe_estimator.summary

with open('C:/Users/Public/Documents/Wages and FTC/Results/AKM approach/akm_0509_oec_withc_siren_fe_summary.txt', 'w') as f:
    for key, value in fe_out.items():
        f.write(f'{key}; {value}\n')

In [None]:
df = bdf.original_ids()
df.to_csv('C:/Users/Public/Documents/Wages and FTC/Data/akm_0509_oec_withc_siren.csv')