In [7]:
import polars as pl
import altair as alt
#alt.data_transformers.enable("vegafusion")

In [8]:
rel_vars= ['countrynewwb','codewb', 'year', 'pop_adult', 'regionwb24_hi', 'incomegroupwb24', 
           'group', 'group2','account_t_d', 'fiaccount_t_d', 'mobileaccount_t_d', 'fin11a',
                  'fin11b', 'fin11c', 'fin11f', 'fin11d', 'fin11e', 'fin14a', 
                  'fin14b', 'fin14c', 'fin14d', 'fin26a', 'fin26b', 'fin27a', 
                  'g20_made', 'fin17f', 'fin17a_17a1_d', 'fin17a', 'fin17b', 'fin17c', 'fin24aSD_ND']

In [9]:
int_cols = ['account_t_d', 'fiaccount_t_d', 'mobileaccount_t_d', 'fin11a',
                  'fin11b', 'fin11c', 'fin11f', 'fin11d', 'fin11e', 'fin14a', 
                  'fin14b', 'fin14c', 'fin14d', 'fin26a', 'fin26b', 'fin27a', 
                  'g20_made', 'fin17f', 'fin17a_17a1_d', 'fin17a', 'fin17b', 'fin17c', 'fin24aSD_ND']

In [10]:
south_asia_countries = ['South Asia', 'India', 'Bangladesh', 'Pakistan']

In [11]:
region_order = ['South Asia','Sub-Saharan Africa', 'Middle East & North Africa',
                'Latin America & Caribbean', 'Europe & Central Asia', 'East Asia & Pacific']

In [12]:
def process_data(): 
    global_findex = pl.read_csv('../data/GlobalFindexDatabase2025.csv', infer_schema_length=10000)
    global_findex_clean = global_findex.select(rel_vars)
    global_findex_int = global_findex_clean.with_columns(
    [pl.col(col).replace('NA', None).cast(pl.Float64) for col in int_cols]
)
    global_findex_per = global_findex_int.with_columns(
        [pl.col(col)*100 for col in int_cols]
    )
    return global_findex_per
    

In [13]:
# generate bar graph comparing account ownership across different regions
def filter_regions(df):
    regions = ['East Asia & Pacific (excluding high income)', 
           'Europe & Central Asia (excluding high income)', 
           'Middle East & North Africa (excluding high income)',
           'Sub-Saharan Africa (excluding high income)',
           'Latin America & Caribbean (excluding high income)', 
           'South Asia']
    
    findex_region = df.filter((pl.col('countrynewwb').is_in(regions)) & (pl.col('group2') == 'all'))
    clean_region_name = findex_region.with_columns(
    pl.when(pl.col('countrynewwb') == 'East Asia & Pacific (excluding high income)').then(pl.lit('East Asia & Pacific'))
    .when(pl.col('countrynewwb') == 'Europe & Central Asia (excluding high income)').then(pl.lit('Europe & Central Asia'))
    .when(pl.col('countrynewwb') == 'Middle East & North Africa (excluding high income)').then(pl.lit('Middle East & North Africa'))
    .when(pl.col('countrynewwb') == 'Latin America & Caribbean (excluding high income)').then(pl.lit('Latin America & Caribbean'))
    .when(pl.col('countrynewwb') == 'Sub-Saharan Africa (excluding high income)').then(pl.lit('Sub-Saharan Africa'))
    .otherwise(pl.col('countrynewwb'))  # keep original value if no match
    .alias('clean_region_name')        # name of the new column
)
    
    return clean_region_name


In [65]:
def region_any_account(df): 
    subtitle_lines = [
        'The percentage of respondents who report having an account at a bank',
        'or similar financial institution or report personally using a mobile money service in the past year.'
    ]
    any_account_chart = alt.Chart(df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('clean_region_name', title = 'Region', sort = region_order), 
        color = alt.Color('account_t_d', title = '% Adults (age 15+)').scale(scheme = 'lightmulti')
    ).properties(
        title = alt.TitleParams(
            text = 'Account Ownership (2011-2024)',
            subtitle = subtitle_lines ,
            subtitleColor='gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return any_account_chart

region_any_account(filter_regions(process_data())).save('../milestones/static-draft-images/region_any_account.svg', 
                                                             ppi=200)
region_any_account(filter_regions(process_data())).save('../milestones/static-draft-images/region_any_account.png', 
                                                             ppi=200)

In [6]:
def region_account_ownership(df): 
    region_account_chart = alt.Chart(df).mark_bar().encode(
        x = 'year:N', 
        y = alt.Y('fiaccount_t_d:Q', title = '% Adults (age 15+)'),
        xOffset=alt.XOffset("clean_region_name:N", sort = region_order),
        color = alt.Color('clean_region_name',title = 'Region', sort = region_order).scale(scheme = 'set2')
        #column= alt.Column('clean_region_name',
                           #title='Region',
                           #header=alt.Header(labelAngle=45))
    ).properties(
        title = alt.TitleParams(
            text = 'Account Ownership (2011-2024)',
            subtitle = 'The percentage of respondents who report having an account at a bank or another similar financial institution.',
            subtitleColor='gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return region_account_chart

region_account_ownership(filter_regions(process_data())).save('../milestones/static-draft-images/region_account_ownership_new.svg', 
                                                             ppi=200)

NameError: name 'region_order' is not defined

In [68]:
def region_mobile_account(df): 
    mm_flt = df.filter(pl.col('year') != 2011)
    #mm_chart = alt.Chart(mm_flt).mark_line(point=True).encode(
        #x = 'year:N', 
        #y = alt.Y('mobileaccount_t_d:Q', title = 'Mobile Money Account(%)'), 
        #column= alt.Column('clean_region_name',
                           #title='Region',
                           #header=alt.Header(labelAngle=40))
    #)
    mm_chart = alt.Chart(df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('clean_region_name', title = 'Region'), 
        color = alt.Color('mobileaccount_t_d', title = '% Adults (age 15+)').scale(scheme = 'lightmulti')
    ).properties(
        title = alt.TitleParams(
            text = 'Mobile Money Account Ownership (2014-2024)',
            subtitle = 'The percentage of respondents who report personally using a mobile money service in the past year.',
            subtitleColor='gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return mm_chart

region_mobile_account(filter_regions(process_data())).save('../milestones/static-draft-images/region_mobile_account_ownership_new.png', 
                                                             ppi=200)

In [63]:
def region_fin_resiliance(df): 
    fin_resiliance_df = df.filter(pl.col('year') != 2021)
    subtitle_lines = [
        'The percentage of respondents who say it is possible and not difficult at all', 
        'or somewhat difficult to come up with the funds in 30 days.'
    ]
    chart = alt.Chart(fin_resiliance_df).mark_bar().encode(
        x = alt.X('clean_region_name:N', title = '').sort('-y'), 
        y = alt.Y('fin24aSD_ND:Q', title = '% Adults (age 15+)'),
        color = alt.Color('clean_region_name', legend = None,title = 'Region').scale(scheme = 'set2'),
        ).properties(
        title = alt.TitleParams(
            text = 'Financial Resilience (2024)',
            subtitle = subtitle_lines,
            subtitleColor='gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
        #column= alt.Column('clean_region_name',title='Region',header=alt.Header(labelAngle=45)))

    return chart

# low financial resiliance in South Asia 
region_fin_resiliance(filter_regions(process_data())).save('../milestones/static-draft-images/region_fin_resiliance_new.png',
                                                           ppi=200)

region_fin_resiliance(filter_regions(process_data())).save('../milestones/static-draft-images/region_fin_resiliance_new.svg',
                                                           ppi=200)
    

In [64]:
def region_savings(df): 
    savings_df = df.filter((pl.col('year') == 2011) | (pl.col('year') == 2024))
    subtitle_lines = [
        'The percentage of respondents who saved at a bank or similar financial institution', 
        'or using a mobile money account in the past year.'
    ]
    region_savings = alt.Chart(savings_df).mark_line(point=True).encode(
        y = alt.Y('clean_region_name:N', title = '', sort = region_order),
        x = alt.Y('fin17a_17a1_d:Q', title = '% Adults (age 15+)'),
        color = alt.Color('clean_region_name', legend = None).scale(scheme = 'set2'), 
        shape = alt.Shape('year:O', title = 'Year')
    ).properties(
        title = alt.TitleParams(
            text = 'Financial use: formal savings (2011-2024)',
            subtitle = subtitle_lines,
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')


    
    return region_savings

region_savings(filter_regions(process_data())).save('../milestones/static-draft-images/region_savings_new.png', 
                                                           ppi=200)
region_savings(filter_regions(process_data())).save('../milestones/static-draft-images/region_savings_new.svg', 
                                                             ppi=200)

In [17]:
def filter_south_asia(df, group = True): 
    global_findex_south_asia = df.filter((pl.col('countrynewwb').is_in(south_asia_countries))) 
    if group: 
        return global_findex_south_asia.filter(pl.col('group2') != 'all')
    else: 
        return global_findex_south_asia.filter(pl.col('group2') == 'all')

In [18]:
def long_acc_ownership_df(df):
    account_df = df.select(pl.col(['countrynewwb', 'year', 'fiaccount_t_d','mobileaccount_t_d']))
    long_accounts = account_df.unpivot(
        index =['countrynewwb', 'year'], 
        on = ['fiaccount_t_d', 'mobileaccount_t_d'], 
        variable_name = 'account_type', 
        value_name = 'account_per')
    long_accounts = long_accounts.with_columns(
        pl.when(pl.col('account_type') == 'fiaccount_t_d').then(pl.lit('Bank or similar inst.'))
            .when(pl.col('account_type') == 'mobileaccount_t_d').then(pl.lit('Mobile Money'))
            .otherwise(pl.col('account_type'))
            .alias('account_type'))

    return long_accounts



In [55]:
def country_account_ownership(df):
    # country_account_chart = alt.Chart(df).mark_bar().encode(
    #     x = 'year:N', 
    #     y = alt.Y('account_per:Q', title = 'Account(%)'),
    #     color = alt.Color('account_type', title  = 'Account Type'),
    #     xOffset = 'account_type',
    #     column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries))

    subtitle_lines = [
        'The percentage of respondents who saved at a bank or similar financial institution',
        'or using a mobile money account in the past year.']

    country_account_chart = alt.Chart(df).mark_bar().encode(
        x='year:N',
        y = alt.Y('account_per:Q', title = '% Adults (age 15+)'),
        color = alt.Color('account_type', title  = 'Account Type').scale(scheme = 'dark2'), 
        column = alt.Column('countrynewwb', title = '', sort = south_asia_countries)
        
).properties(
        title = alt.TitleParams(
            text = 'Account Ownership By Country (2011-2024)',
            subtitle = subtitle_lines,
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')

    

    return country_account_chart
country_account_ownership(long_acc_ownership_df(filter_south_asia(process_data(), False))).save('../milestones/static-draft-images/country_account_ownership_new.png', 
                                                             ppi=200)
country_account_ownership(long_acc_ownership_df(filter_south_asia(process_data(), False))).save('../milestones/static-draft-images/country_account_ownership_new.svg', 
                                                             ppi=200)

In [53]:
def account_owenership_gender_gaps(df):
    gender_df = df.filter(pl.col('group') == 'gender')
    gender_df_filt = gender_df.filter((pl.col('year') == 2011) | (pl.col('year') == 2024))
    new_colors = ['#8624f5', '#1fc3aa']
    # account_gender_gap = alt.Chart(gender_df).mark_line(point = True).encode(
    #     x = 'year:N', 
    #     y = alt.Y('fiaccount_t_d:Q',title = 'Account at a bank or similiar financial institution(%)'),
    #     xOffset='group2',
    #     color = alt.Color('group2').scale(scheme = 'redyellowgreen').title('Gender'), 
    #     column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    # )

    subtitle_lines = [
        'The percentage of male or female respondents who saved at a bank',
        'or similar financial institution or using a mobile money account in the past year.'
    ]

    account_gender_gap = alt.Chart(gender_df_filt).mark_line(point = True).encode(
        x = alt.X('account_t_d:Q', title = '% Adults (age 15+)'),
        y = alt.Y('group2:N', title = ''),
        color = alt.Color('group2:N', legend = None,scale = alt.Scale(range = new_colors)),
        shape = alt.Shape('year:N', title = 'Year'),
        #column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
    ).facet(
    facet=alt.Facet('countrynewwb:N', title='Country', sort=south_asia_countries),
    columns=1).properties(
        title = alt.TitleParams(
            text = 'Account Ownership By Gender (2011-2024)',
            subtitle = subtitle_lines,
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')

    return account_gender_gap
account_owenership_gender_gaps(filter_south_asia(process_data(), True))
account_owenership_gender_gaps(filter_south_asia(process_data(), True)).save('../milestones/static-draft-images/any_account_owenership_gender_gaps_new.svg', 
                                                             ppi=200)
account_owenership_gender_gaps(filter_south_asia(process_data(), True)).save('../milestones/static-draft-images/any_account_owenership_gender_gaps_new.png', 
                                                             ppi=200)

In [46]:
def mobile_account_gender_gaps(df):
    # DROP
    new_colors = ['#8624f5', '#1fc3aa']
    gender_df = df.filter(pl.col('group') == 'gender')
    # mobile_gender_gap = alt.Chart(gender_df).mark_line(point = True).encode(
    #     y = alt.Y('group2:N', title = 'Gender'),
    #     x = alt.X('mobileaccount_t_d:Q',title = 'Mobile Money Account(%)'),
    #     text = 'year:N',
    #     column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries))

    mobile_gender_gap = alt.Chart(gender_df).mark_line(point = True).encode(
        x = 'year:N', 
        y = alt.Y('mobileaccount_t_d:Q',title = 'Account at a bank or similiar financial institution(%)'),
        xOffset='group2',
        color = alt.Color('group2').scale(scheme = 'redyellowgreen').title('Gender'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    )
   
    return mobile_gender_gap 

mobile_account_gender_gaps(filter_south_asia(process_data(), True))
    #.save('../milestones/static-draft-images/mobile_account_gender_gaps.png', 
                                                             #ppi=200)

In [66]:
def account_age_gaps(df):
    age_df = df.filter(pl.col('group') == 'age_cat')
    account_age_gap = alt.Chart(age_df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('group2:N', title = 'Age'), 
        color = alt.Color('fiaccount_t_d:Q', title = 'Account at bank or similiar inst.(%)')
            .scale(scheme = 'lightmulti'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
            
        
    )
    return account_age_gap

account_age_gaps(filter_south_asia(process_data(), True)).save('../milestones/static-draft-images/account_age_gaps.png', 
                                                             ppi=200)

In [50]:
def account_income_gaps(df):
    subtitle_lines = [
    "The percentage of respondents in each income group",
    "who saved at a bank or similar financial institution",
    "or using a mobile money account in the past year."
]
    income_df = df.filter(pl.col('group') == 'income')
    account_income_gap = alt.Chart(income_df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('group2:N', title = ''), 
        color = alt.Color('account_t_d:Q', title = '% Adults (age 15+)')
            .scale(scheme = 'lightmulti'), 
        #column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)      
    ).facet(
    facet=alt.Facet('countrynewwb:N', title='Country', sort=south_asia_countries),
    columns=1).properties(
        title = alt.TitleParams(
            text = 'Account Ownership By Income Group (2011-2024)',
            subtitle = subtitle_lines,
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return account_income_gap

account_income_gaps(filter_south_asia(process_data(), True)).save('../milestones/static-draft-images/any_account_income_gaps.png', 
                                                             ppi=200)
                          
account_income_gaps(filter_south_asia(process_data(), True)).save('../milestones/static-draft-images/any_account_income_gaps.svg', 
                                                            ppi=200)
                          

In [43]:
def mobile_income_gaps(df):
    income_df = df.filter(pl.col('group') == 'income')
    mobile_income_gap = alt.Chart(income_df).mark_line(point = True).encode(
        y = alt.Y('group2:N', title = 'Gender'),
        x = alt.X('mobileaccount_t_d:Q',title = 'Mobile Money Account(%)'),
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    )
    return mobile_income_gap

mobile_income_gaps(filter_south_asia(process_data(), True))

In [44]:
# saved formally: fin17a_17a1_d
# saved informally: fin17
# financial resiliance: fin24aSD_ND

In [21]:
df = filter_south_asia(process_data(), False)

In [22]:
def savings_behavior_df(df):
    long_savings = df.unpivot(
        index =['countrynewwb', 'year', 'group', 'group2'], 
        on = ['fin17a_17a1_d', 'fin17c'], 
        variable_name = 'savings_method', 
        value_name = 'savings_per')
    
    long_savings = long_savings.with_columns(
    pl.when(pl.col('savings_method') == 'fin17a_17a1_d').then(pl.lit('Saved Formally'))
    .when(pl.col('savings_method') == 'fin17c').then(pl.lit('Saved Informally'))
    .otherwise(pl.col('savings_method'))
    .alias('savings_method'))

    clean_region_name = long_savings.with_columns(
    pl.when(pl.col('countrynewwb') == 'East Asia & Pacific (excluding high income)').then(pl.lit('East Asia & Pacific'))
    .when(pl.col('countrynewwb') == 'Europe & Central Asia (excluding high income)').then(pl.lit('Europe & Central Asia'))
    .when(pl.col('countrynewwb') == 'Middle East & North Africa (excluding high income)').then(pl.lit('Middle East & North Africa'))
    .when(pl.col('countrynewwb') == 'Latin America & Caribbean (excluding high income)').then(pl.lit('Latin America & Caribbean'))
    .when(pl.col('countrynewwb') == 'Sub-Saharan Africa (excluding high income)').then(pl.lit('Sub-Saharan Africa'))
    .otherwise(pl.col('countrynewwb'))  # keep original value if no match
    .alias('clean_region_name')        # name of the new column
)

    long_savings = long_savings.filter(pl.col('year') != 2011)
    
    return clean_region_name
    

In [71]:
def region_savings(df): 
    region_savings = alt.Chart(df).mark_line(point = True).encode(
        x = 'year:N',
        y = alt.Y('savings_per:Q', title = 'Savings(%)'),
        color = alt.Color('savings_method').scale(scheme = 'greenblue'),  
        column = alt.Column('clean_region_name', title = 'Regions', header=alt.Header(labelAngle=45)))
    return region_savings

region_savings(savings_behavior_df(filter_regions(process_data())))#.save('../milestones/static-draft-images/region_savings.png', 
                                                             #ppi=200)

In [29]:
def savings_behavior_by_country(df): 
    savings_chart = alt.Chart(df).mark_line(point = True).encode(
        x = 'year:N',
        y = alt.Y('savings_per:Q', title = '% Adults (age 15+)'),
        color = alt.Color('savings_method').scale(scheme = 'greenblue'),  
        #column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
    
    ).properties(
        title = alt.TitleParams(
            text = 'Savings Behavior By Country (2011-2024)',
            subtitle = 'The percentage of respondents who reported saving formally or informally.',
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return savings_chart

savings_behavior_by_country(savings_behavior_df(filter_south_asia(process_data(), False))).save('../milestones/static-draft-images/savings_behavior_by_country_new.svg', 
                                                             ppi=200)
savings_behavior_by_country(savings_behavior_df(filter_south_asia(process_data(), False))).save('../milestones/static-draft-images/savings_behavior_by_country_new.png', 
                                                             ppi=200)

In [62]:
# too much missing data --> skip this
def savings_behavior_by_gender(df):
    gender_df = df.filter(pl.col('group') == 'income')
    savings_gender_gap = alt.Chart(gender_df).mark_line(point=True).encode(
        x = 'year:N', 
        y = alt.Y('fin17c:Q', title = 'Saved Money Formally(%)'), 
        color = alt.Color('group2:N', title = 'Income').scale(scheme = 'lightmulti'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
            
        
    )
    return savings_gender_gap

savings_behavior_by_gender(filter_south_asia(process_data(), True))

In [61]:
def financial_resiliance_all(df): 
    df_filt = df.filter(pl.col('year') == 2024)
    subtitle_lines = [
        'The percentage of respondents who say it is possible and not difficult at all', 
        'or somewhat difficult to come up with the funds in 30 days.'
    ]
    country_resiliance_chart = alt.Chart(df_filt).mark_bar().encode(
        x = alt.X('countrynewwb:N', sort = south_asia_countries, title = ''),
        y = alt.Y('fin24aSD_ND:Q', title = '% Adults (age 15+)')
        #column= alt.Column('countrynewwb', title  = 'Country', sort = south_asia_countries)
    ).properties(
        width = 175,
        title = alt.TitleParams(
            text = 'Financial Resilience (2024)',
            subtitle = subtitle_lines,
            subtitleColor = 'gray',
            subtitleFontSize=12, 
            align = 'left')).configure_title(
            anchor='start')
    return country_resiliance_chart
financial_resiliance_all(filter_south_asia(process_data(), False)).save('../milestones/static-draft-images/financial_resiliance_all_new.png', 
                                                             ppi=200)
financial_resiliance_all(filter_south_asia(process_data(), False)).save('../milestones/static-draft-images/financial_resiliance_all_new.svg', 
                                                             ppi=200)