In [None]:
import pandas as pd
from pathlib import Path
from datetime import datetime


In [18]:
data_path = Path('/data/eop/country_data')

rates = dict()

for dir in data_path.iterdir():
    try:
        if (dir / 'cleaned').exists():
            cleaned_dir = dir / 'cleaned'
        elif (dir / 'clean').exists():
            print(f'found "clean" instead of "cleaned": {dir.name}')
            cleaned_dir = dir / 'clean'
        else:
            print(f'No cleaned data directory found for {dir.name}')

        train = pd.read_parquet(cleaned_dir / 'train.parquet')
        test = pd.read_parquet(cleaned_dir / 'test.parquet')

        data = pd.concat((train, test), ignore_index=True)

        count_poor = (
            data[data.consumption_per_capita_per_day < 2.15].headcount_adjusted_hh_wgt
        ).sum()

        total = (
            data.headcount_adjusted_hh_wgt
        ).sum()
        rate = count_poor / total

        rates[dir.name] = rate
    
    except Exception as e:
        print(f'Error encountered with {dir.name}; skipping')
        print(e)

Error encountered with benin; skipping
[Errno 2] No such file or directory: '/data/eop/country_data/benin/cleaned/train.parquet'
No cleaned data directory found for colombia
Error encountered with ghana_henry; skipping
[Errno 2] No such file or directory: '/data/eop/country_data/ghana_henry/cleaned/train.parquet'
No cleaned data directory found for guatemala
Error encountered with south_sudan; skipping
[Errno 2] No such file or directory: '/data/eop/country_data/south_sudan/cleaned/train.parquet'
found "clean" instead of "cleaned": ethiopia
found "clean" instead of "cleaned": togo
found "clean" instead of "cleaned": nigeria


In [22]:
rates_df = pd.DataFrame(list(rates.items()), columns=['country', 'poverty_rate'])
unfinished_countries = ['colombia', 'guatemala',]
rates_df = rates_df[~rates_df['country'].isin(unfinished_countries)]
display(rates_df)

Unnamed: 0,country,poverty_rate
0,burkina_faso,0.266885
2,cote_divoire,0.087716
3,ghana_nolan,0.198419
5,guinea-bissau,0.182665
6,kenya,0.363924
7,madagascar,0.759238
8,malawi,0.710823
9,mali,0.092979
10,niger,0.475415
11,somalia,0.099855


In [23]:
today_str = datetime.today().strftime('%Y%m%d')
rates_df.to_csv(f'/data/eop/poverty_rates_from_survey_{today_str}.csv')

In [24]:
rates_df

Unnamed: 0,country,poverty_rate
0,burkina_faso,0.266885
2,cote_divoire,0.087716
3,ghana_nolan,0.198419
5,guinea-bissau,0.182665
6,kenya,0.363924
7,madagascar,0.759238
8,malawi,0.710823
9,mali,0.092979
10,niger,0.475415
11,somalia,0.099855
