### Import necessary packages

In [None]:
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore')
# pip install --index-url https://test.pypi.org/simple/ synthetic-panel
from synthetic_panel import estimate_transitions

In [13]:
path = r"D:\OneDrive\Documents\synthetic_panel"
os.chdir(path)

### Preparing data

In [14]:
df2010 = pd.read_stata(os.path.join(path,"nlss3_.dta"))
df2010.columns

Index(['pline_7', 'education_hh', 'rcons', 'gender', 'age1', 'age2', 'age3',
       'age4', 'dwelling'],
      dtype='object')

In [15]:
df2021 = pd.read_stata(os.path.join(path,"nlss4_.dta"))
df2021.columns

Index(['education_hh', 'pline', 'gender', 'age1', 'age2', 'age3', 'age4',
       'dwelling', 'rcons'],
      dtype='object')

In [16]:
print(df2010.columns.tolist())
print(df2021.columns.tolist())

['pline_7', 'education_hh', 'rcons', 'gender', 'age1', 'age2', 'age3', 'age4', 'dwelling']
['education_hh', 'pline', 'gender', 'age1', 'age2', 'age3', 'age4', 'dwelling', 'rcons']


### Transition dynamics

In [17]:
bootstrap_results = estimate_transitions(
    df_round1=df2010,
    df_round2=df2021,
    x_cols=['gender','age1', 'age2', 'age3','age4','dwelling'],         
    cohort_cols=['gender','age1', 'age2', 'age3','age4','dwelling'],
    dep_var_round1='rcons',                                   # dependent var in round1
    dep_var_round2='rcons',                                   # dependent var in round2
    pline_round1_name='pline_7',                              # will auto-calc using df2010['pline_7']
    pline_round2_name='pline',                                # poverty line col in df2021
    cohort_col='cohort',                                      # cohort ID column name
    auto_create_cohort=True,                                  # create cohort from cohort_cols
    log_transform=True,                                       # log-transform dependent vars
    n_bootstrap=5,                                            # number of bootstrap reps
    use_multiprocessing=True,                                 # use parallel processing
    output_excel_filename="test.xlsx",                        # save Excel file in current dir
    seed=42                                                    # random seed for reproducibility
)

print(bootstrap_results.head())

✅ Column 'gender' is already integer.
✅ Column 'age1' is already integer.
✅ Column 'age2' is already integer.
✅ Column 'age3' is already integer.
✅ Column 'age4' is already integer.
✅ Column 'dwelling' is already integer.

🔧 Auto-generating cohort from cohort_cols: ['gender', 'age1', 'age2', 'age3', 'age4', 'dwelling']


cohort
0_0_0_0_1_0    89
Name: count, dtype: int64

Consider collapsing categories or using fewer cohort_cols.

⏳ Running bootstrap...


Bootstrap (MP): 100%|██████████| 5/5 [01:11<00:00, 14.29s/iter]


✅ Bootstrap completed.
⏱️  Total time: 1.2 min

=== Bootstrap Poverty Transition Shares ===
Stayed Poor (P11): 13.7%  (SE: 0.82%)
Escaped Poverty (P10): 5.0%  (SE: 0.85%)
Fell into Poverty (P01): 5.1%  (SE: 0.83%)
Stayed Non-poor (P00): 76.3%  (SE: 0.86%)

💾 Saved results to D:\OneDrive\Documents\synthetic_panel\test.xlsx
      rho_c  rho_partial  Stayed Poor (P11)  Escaped Poverty (P10)  \
0  0.912788     0.914475           0.142673               0.043604   
1  0.820061     0.816757           0.122782               0.063832   
2  0.906278     0.909886           0.141180               0.044654   
3  0.900472     0.909539           0.140961               0.044724   
4  0.881059     0.883099           0.135489               0.050949   

   Fell into Poverty (P01)  Stayed Non-poor (P00)  
0                 0.045294               0.768430  
1                 0.065189               0.748197  
2                 0.046345               0.767821  
3                 0.046399               0.767


