This notebook generates the confidence intervals for the parameters that we put into the paper.



In [23]:
import pickle
import sys
import numpy as np
import pandas as pd
from scipy import stats
from pyprojroot.here import here
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(str(here("write/src")))
from plot_helpers import find_mode, fit_trunc_laplace

In [1]:
#######################################
# Import data
#######################################

# Empirical trajectories
df_traj_all = pd.read_csv(here("write/input/import/all-trajectories.csv"))

# Bootstrapped fits
with open(here("write/input/fit/bootstrap-all-trajectories.pickle"), "rb") as f:
    bootstrap_results = pickle.load(f)

alpha_q0_bootstrap = bootstrap_results['alpha_q0_bootstrap']

with open(here("write/input/simulate/simulated-all.pickle"), "rb") as f:
    simulated_results_all = pickle.load(f)
    sim_trajs_all = simulated_results_all['trajs']

In [17]:
df_traj_all = df_traj_all.dropna(subset=['q_adj_delta'])
yrs_b = df_traj_all[(df_traj_all.CareerAgeZero > 0) & (df_traj_all.CareerAgeZero <= 4)]
yrs_c = df_traj_all[(df_traj_all.CareerAgeZero > 4) & (df_traj_all.CareerAgeZero <= 7)]
yrs_d = df_traj_all[(df_traj_all.CareerAgeZero > 7)]

In [58]:
def block_bootstrap(df, block_column, target_column, n_iterations=500):
    unique_values = df[block_column].unique()
    bootstrapped_targets = []
    
    for _ in range(n_iterations):
        sampled_values = np.random.choice(unique_values, size=len(unique_values), replace=True)
        sampled_target = df.loc[df[block_column].isin(sampled_values), target_column].copy()
        bootstrapped_targets.append(sampled_target)
    
    return bootstrapped_targets

yrs_b_boot = block_bootstrap(yrs_b, 'dblp', 'q_adj_delta', n_iterations=10000)
yrs_b_modes = [find_mode(x) for x in yrs_b_boot]
yrs_b_alphas = [fit_trunc_laplace(x, mode) for (x, mode) in zip(yrs_b_boot, modes)]

In [59]:
yrs_c_boot = block_bootstrap(yrs_c, 'dblp', 'q_adj_delta', n_iterations=10000)
yrs_c_modes = [find_mode(x) for x in yrs_c_boot]
yrs_c_alphas = [fit_trunc_laplace(x, mode) for (x, mode) in zip(yrs_c_boot, modes)]

In [64]:
yrs_d_boot = block_bootstrap(yrs_d, 'dblp', 'q_adj_delta', n_iterations=10000)
yrs_d_modes = [find_mode(x) for x in yrs_d_boot]
yrs_d_alphas = [fit_trunc_laplace(x, mode) for (x, mode) in zip(yrs_d_boot, modes)]

In [66]:
[
    np.mean(yrs_b_alphas), np.percentile(yrs_b_alphas, [2.5, 97.5]),
    np.mean(yrs_b_modes), np.percentile(yrs_b_modes, [2.5, 97.5])
]

[3.8729633406900676,
 array([3.78120744, 3.97198334]),
 -0.1275923319844635,
 array([-0.47720549,  0.35216393])]

In [67]:
[
    np.mean(yrs_c_alphas), np.percentile(yrs_c_alphas, [2.5, 97.5]),
    np.mean(yrs_c_modes), np.percentile(yrs_c_modes, [2.5, 97.5])
]

[3.6411461469899478,
 array([3.53924834, 3.7726815 ]),
 -0.12591832441685538,
 array([-1.76687527,  0.3208474 ])]

In [68]:
[
    np.mean(yrs_d_alphas), np.percentile(yrs_d_alphas, [2.5, 97.5]),
    np.mean(yrs_d_modes), np.percentile(yrs_d_modes, [2.5, 97.5]),
]

[3.2825849429751375,
 array([3.18797392, 3.38665787]),
 -0.40143228949189347,
 array([-0.6398291 ,  0.09291297])]

In [121]:
def print_CIs(bootstrapped):
        print(f"""mean: {np.mean(bootstrapped)} 95% CI: ({np.percentile(bootstrapped, [2.5, 97.5])})""")

In [123]:
print_CIs([x[(4,7,13)][0]['alpha'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][1]['alpha'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][2]['alpha'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][3]['alpha'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])

mean: 4.450988611240126 95% CI: ([4.32466998 4.57403457])
mean: 4.255954464410283 95% CI: ([4.11776171 4.39164867])
mean: 3.7793223852742703 95% CI: ([3.66062061 3.90375828])
mean: 3.5195883525245195 95% CI: ([3.37573705 3.67729561])


In [125]:
print_CIs([x[(4,7,13)][0]['mode_beta'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][1]['mode_beta'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][2]['mode_beta'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][3]['mode_beta'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])

mean: 0.8088373695947516 95% CI: ([0.79223565 0.82950146])
mean: 0.7250510957099011 95% CI: ([0.71458285 0.74736465])
mean: 0.7283315130593058 95% CI: ([0.72021494 0.75086847])
mean: 0.7547762480437192 95% CI: ([0.72621601 0.77583279])


In [126]:
print_CIs([x[(4,7,13)][0]['mode_mu'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][1]['mode_mu'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][2]['mode_mu'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])
print_CIs([x[(4,7,13)][3]['mode_mu'] for x in bootstrap_results['regression_for_cutoffs_bootstrap']])

mean: -0.10887631873310692 95% CI: ([-0.54453426  0.35216393])
mean: -0.06161134889126299 95% CI: ([-0.6088507   0.17078201])
mean: -0.41147426570170326 95% CI: ([-0.62500489  0.1484991 ])
mean: 0.14052272576045358 95% CI: ([-0.50795679  1.49597916])
