### Import necessary packages

In [18]:
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore')
# pip install --index-url https://test.pypi.org/simple/ synthetic-panel
from synthetic_panel import estimate_transitions

In [24]:
path = "https://github.com/rohanbjkr/synthetic_panel/raw/refs/heads/main"

### Preparing data

In [25]:
df2010 = pd.read_stata(os.path.join(path,"round1.dta"))
df2010.columns

Index(['pline_7', 'education_hh', 'rcons', 'gender', 'age1', 'age2', 'age3',
       'age4', 'dwelling'],
      dtype='object')

In [26]:
df2021 = pd.read_stata(os.path.join(path,"round2.dta"))
df2021.columns

Index(['education_hh', 'pline', 'gender', 'age1', 'age2', 'age3', 'age4',
       'dwelling', 'rcons'],
      dtype='object')

### Transition dynamics

In [None]:
bootstrap_results = estimate_transitions(
    df_round1=df2010,
    df_round2=df2021,
    x_cols=['gender','age1', 'age2', 'age3','age4'],         
    cohort_cols=['gender','age1', 'age2', 'age3','age4'],
    dep_var_round1='rcons',                                   # dependent var in round1
    dep_var_round2='rcons',                                   # dependent var in round2
    pline_round1_name='pline_7',                              # will auto-calc using df2010['pline_7']
    pline_round2_name='pline',                                # poverty line col in df2021
    cohort_col='cohort',                                      # cohort ID column name
    auto_create_cohort=True,                                  # create cohort from cohort_cols
    log_transform=True,                                       # log-transform dependent vars
    n_bootstrap=5,                                            # number of bootstrap reps
    use_multiprocessing=True,                                 # use parallel processing
    output_excel_filename="test.xlsx",                        # save Excel file in current dir
    seed=42                                                    # random seed for reproducibility
)

print(bootstrap_results.head())

✅ Column 'gender' is already integer.
✅ Column 'age1' is already integer.
✅ Column 'age2' is already integer.
✅ Column 'age3' is already integer.
✅ Column 'age4' is already integer.
✅ Column 'dwelling' is already integer.

🔧 Auto-generating cohort from cohort_cols: ['gender', 'age1', 'age2', 'age3', 'age4', 'dwelling']


cohort
1_0_0_1_0_1    29
1_0_0_0_1_1    26
0_1_0_0_0_1    23
1_0_1_0_0_1    20
0_0_1_0_0_1    12
0_0_0_1_0_1    11
0_0_0_0_1_1    11
0_1_0_0_0_0    11
1_1_0_0_0_1    10
0_0_0_1_0_0     9
1_1_0_0_0_0     9
1_0_0_0_1_0     7
1_0_1_0_0_0     6
1_0_0_1_0_0     4
0_0_1_0_0_0     4
Name: count, dtype: int64

Consider collapsing categories or using fewer cohort_cols.

⏳ Running bootstrap...


Bootstrap (MP): 100%|██████████| 5/5 [00:05<00:00,  1.14s/iter]


✅ Bootstrap completed.
⏱️  Total time: 5.7 sec

=== Bootstrap Poverty Transition Shares ===
Stayed Poor (P11): 2.4%  (SE: 1.40%)
Escaped Poverty (P10): 17.4%  (SE: 1.26%)
Fell into Poverty (P01): 13.2%  (SE: 1.15%)
Stayed Non-poor (P00): 67.0%  (SE: 0.97%)

💾 Saved results to D:\OneDrive\Documents\synthetic_panel\test.xlsx
      rho_c  rho_partial  Stayed Poor (P11)  Escaped Poverty (P10)  \
0  0.159851     0.145094           0.042141               0.158236   
1 -0.217859    -0.260548           0.016829               0.183952   
2  0.038360     0.020227           0.033386               0.164176   
3 -0.133290    -0.160593           0.021740               0.174267   
4 -0.473152    -0.514343           0.006468               0.187699   

   Fell into Poverty (P01)  Stayed Non-poor (P00)  
0                 0.116897               0.682727  
1                 0.135601               0.663622  
2                 0.126394               0.676044  
3                 0.135923               0.66


