In [1]:
import polars as pl
import altair as alt
#alt.data_transformers.enable("vegafusion")

In [3]:
rel_vars= ['countrynewwb','codewb', 'year', 'pop_adult', 'regionwb24_hi', 'incomegroupwb24', 
           'group', 'group2','account_t_d', 'fiaccount_t_d', 'mobileaccount_t_d', 'fin11a',
                  'fin11b', 'fin11c', 'fin11f', 'fin11d', 'fin11e', 'fin14a', 
                  'fin14b', 'fin14c', 'fin14d', 'fin26a', 'fin26b', 'fin27a', 
                  'g20_made', 'fin17f', 'fin17a_17a1_d', 'fin17a', 'fin17b', 'fin17c', 'fin24aSD_ND']

In [4]:
int_cols = ['account_t_d', 'fiaccount_t_d', 'mobileaccount_t_d', 'fin11a',
                  'fin11b', 'fin11c', 'fin11f', 'fin11d', 'fin11e', 'fin14a', 
                  'fin14b', 'fin14c', 'fin14d', 'fin26a', 'fin26b', 'fin27a', 
                  'g20_made', 'fin17f', 'fin17a_17a1_d', 'fin17a', 'fin17b', 'fin17c', 'fin24aSD_ND']

In [5]:
south_asia_countries = ['South Asia', 'India', 'Bangladesh', 'Pakistan']

In [6]:
def process_data(): 
    global_findex = pl.read_csv('../data/GlobalFindexDatabase2025.csv', infer_schema_length=10000)
    global_findex_clean = global_findex.select(rel_vars)
    global_findex_int = global_findex_clean.with_columns(
    [pl.col(col).replace('NA', None).cast(pl.Float64) for col in int_cols]
)
    global_findex_per = global_findex_int.with_columns(
        [pl.col(col)*100 for col in int_cols]
    )
    return global_findex_per
    

In [7]:
# generate bar graph comparing account ownership across different regions
def filter_regions(df):
    regions = ['East Asia & Pacific (excluding high income)', 
           'Europe & Central Asia (excluding high income)', 
           'Middle East & North Africa (excluding high income)',
           'Sub-Saharan Africa (excluding high income)',
           'Latin America & Caribbean (excluding high income)', 
           'South Asia']
    
    findex_region = df.filter((pl.col('countrynewwb').is_in(regions)) & (pl.col('group2') == 'all'))
    clean_region_name = findex_region.with_columns(
    pl.when(pl.col('countrynewwb') == 'East Asia & Pacific (excluding high income)').then(pl.lit('East Asia & Pacific'))
    .when(pl.col('countrynewwb') == 'Europe & Central Asia (excluding high income)').then(pl.lit('Europe & Central Asia'))
    .when(pl.col('countrynewwb') == 'Middle East & North Africa (excluding high income)').then(pl.lit('Middle East & North Africa'))
    .when(pl.col('countrynewwb') == 'Latin America & Caribbean (excluding high income)').then(pl.lit('Latin America & Caribbean'))
    .when(pl.col('countrynewwb') == 'Sub-Saharan Africa (excluding high income)').then(pl.lit('Sub-Saharan Africa'))
    .otherwise(pl.col('countrynewwb'))  # keep original value if no match
    .alias('clean_region_name')        # name of the new column
)
    
    return clean_region_name


In [8]:
def region_account_ownership(df): 
    region_account_chart = alt.Chart(df).mark_bar().encode(
        x = 'year:N', 
        y = alt.Y('fiaccount_t_d:Q', title = 'Account at a bank or similiar financial institution(%)'),
        column= alt.Column('clean_region_name',
                           title='Region',
                           header=alt.Header(labelAngle=45))
    )
    return region_account_chart

region_account_ownership(filter_regions(process_data()))

In [9]:
def region_mobile_account(df): 
    mm_flt = df.filter(pl.col('year') != 2011)
    mm_chart = alt.Chart(mm_flt).mark_line(point=True).encode(
        x = 'year:N', 
        y = alt.Y('mobileaccount_t_d:Q', title = 'Mobile Money Account(%)'), 
        column= alt.Column('clean_region_name',
                           title='Region',
                           header=alt.Header(labelAngle=40))
    )
    return mm_chart

region_mobile_account(filter_regions(process_data()))

In [10]:
def filter_south_asia(df, group = True): 
    global_findex_south_asia = df.filter((pl.col('countrynewwb').is_in(south_asia_countries))) 
    if group: 
        return global_findex_south_asia.filter(pl.col('group2') != 'all')
    else: 
        return global_findex_south_asia.filter(pl.col('group2') == 'all')

In [11]:
def filter_south_asia_region(df, group = True): 
    south_asia = df.filter(pl.col('countrynewwb') == 'South Asia')
    if group: 
        return south_asia.filter(pl.col('group2') != 'all')
    else:
        return south_asia.filter(pl.col('group2') == 'all')

In [12]:
def country_account_ownership(df):
    country_account_chart = alt.Chart(df).mark_line(point=True).encode(
        x = 'year:N', 
        y = alt.Y('fiaccount_t_d:Q', title = 'Account at a bank or similiar financial institution(%)'),
        color= alt.Color('countrynewwb', title  = 'Country')
    )
    return country_account_chart
country_account_ownership(filter_south_asia(process_data(), False))

In [13]:
def country_mobile_account_ownership(df):
    country_account_chart = alt.Chart(df).mark_bar().encode(
        x = 'year:N', 
        y = alt.Y('mobileaccount_t_d:Q', title = 'Mobile Money Account(%)'),
        column= alt.Column('countrynewwb', title  = 'Country', sort = south_asia_countries)
    )
    return country_account_chart
country_mobile_account_ownership(filter_south_asia(process_data(), False))

In [14]:
def account_owenership_gender_gaps(df):
    gender_df = df.filter(pl.col('group') == 'gender')
    account_gender_gap = alt.Chart(gender_df).mark_line(point = True).encode(
        x = 'year:N', 
        y = alt.Y('fiaccount_t_d:Q',title = 'Account at a bank or similiar financial institution(%)'),
        color = alt.Color('group2').scale(scheme = 'redyellowgreen').title('Gender'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    )
    return account_gender_gap
account_owenership_gender_gaps(filter_south_asia(process_data(), True))

In [15]:
def mobile_account_gender_gaps(df):
    gender_df = df.filter(pl.col('group') == 'gender')
    mobile_gender_gap = alt.Chart(gender_df).mark_line(point = True).encode(
        y = alt.Y('group2:N', title = 'Gender'),
        x = alt.X('mobileaccount_t_d:Q',title = 'Mobile Money Account(%)'),
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    )
    return mobile_gender_gap

mobile_account_gender_gaps(filter_south_asia(process_data(), True))


In [16]:
def account_age_gaps(df):
    age_df = df.filter(pl.col('group') == 'age_cat')
    account_age_gap = alt.Chart(age_df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('group2:N', title = 'Age'), 
        color = alt.Color('fiaccount_t_d:Q', title = 'Account(%)')
            .scale(scheme = 'lightmulti'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
            
        
    )
    return account_age_gap

account_age_gaps(filter_south_asia(process_data(), True))

In [47]:
def account_income_gaps(df):
    income_df = df.filter(pl.col('group') == 'income')
    account_income_gap = alt.Chart(income_df).mark_rect().encode(
        x = 'year:N', 
        y = alt.Y('group2:N', title = 'Income'), 
        color = alt.Color('fiaccount_t_d:Q', title = 'Account(%)')
            .scale(scheme = 'lightmulti'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
            
        
    )
    return account_income_gap

account_income_gaps(filter_south_asia(process_data(), True))
                          

In [18]:
def mobile_income_gaps(df):
    income_df = df.filter(pl.col('group') == 'income')
    mobile_income_gap = alt.Chart(income_df).mark_line(point = True).encode(
        y = alt.Y('group2:N', title = 'Gender'),
        x = alt.X('mobileaccount_t_d:Q',title = 'Mobile Money Account(%)'),
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
        
    )
    return mobile_income_gap

mobile_income_gaps(filter_south_asia(process_data(), True))

In [19]:
df = filter_south_asia(process_data(), False)

In [None]:
# saved formally: fin17a_17a1_d
# saved informally: fin17
# financial resiliance: fin24aSD_ND

In [31]:
df = filter_south_asia(process_data(), False)

In [51]:
def savings_behavior_df(df):
    long_savings = df.unpivot(
        index =['countrynewwb', 'year', 'group', 'group2'], 
        on = ['fin17a_17a1_d', 'fin17c'], 
        variable_name = 'savings_method', 
        value_name = 'savings_per')
    
    long_savings = long_savings.with_columns(
    pl.when(pl.col('savings_method') == 'fin17a_17a1_d').then(pl.lit('Saved Formally'))
    .when(pl.col('savings_method') == 'fin17c').then(pl.lit('Saved Informally'))
    .otherwise(pl.col('savings_method'))
    .alias('savings_method'))

    long_savings = long_savings.filter(pl.col('year') != 2011)
    
    return long_savings
    

In [52]:
savings_behavior_df(filter_south_asia(process_data(), True))

countrynewwb,year,group,group2,savings_method,savings_per
str,i64,str,str,str,f64
"""Bangladesh""",2014,"""gender""","""men""","""Saved Formally""",
"""Bangladesh""",2014,"""gender""","""women""","""Saved Formally""",
"""India""",2014,"""gender""","""men""","""Saved Formally""",18.427435
"""India""",2014,"""gender""","""women""","""Saved Formally""",10.129097
"""Pakistan""",2014,"""gender""","""men""","""Saved Formally""",
…,…,…,…,…,…
"""South Asia""",2017,"""education""","""prim edu or less""","""Saved Informally""",9.5428362
"""South Asia""",2021,"""education""","""secondary edu or more""","""Saved Informally""",
"""South Asia""",2021,"""education""","""prim edu or less""","""Saved Informally""",
"""South Asia""",2024,"""education""","""secondary edu or more""","""Saved Informally""",14.194567


In [66]:
def savings_behavior_by_country(df): 
    savings_chart = alt.Chart(df).mark_area().encode(
        x = 'year:N',
        y = 'savings_per:Q',
        color = alt.Color('savings_method').scale(scheme = 'greenblue'),  
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
    
    )
    return savings_chart

savings_behavior_by_country(savings_behavior_df(filter_south_asia(process_data(), False)))

In [62]:
# too much missing data --> skip this
def savings_behavior_by_gender(df):
    gender_df = df.filter(pl.col('group') == 'income')
    savings_gender_gap = alt.Chart(gender_df).mark_line(point=True).encode(
        x = 'year:N', 
        y = alt.Y('fin17c:Q', title = 'Saved Money Formally(%)'), 
        color = alt.Color('group2:N', title = 'Income').scale(scheme = 'lightmulti'), 
        column = alt.Column('countrynewwb', title = 'Country', sort = south_asia_countries)
            
        
    )
    return savings_gender_gap

savings_behavior_by_gender(filter_south_asia(process_data(), True))

In [69]:
df.select(pl.col(['countrynewwb', 'year', 'group', 'group2', 'fin24aSD_ND']))

countrynewwb,year,group,group2,fin24aSD_ND
str,i64,str,str,f64
"""Bangladesh""",2011,"""all""","""all""",
"""India""",2011,"""all""","""all""",
"""Pakistan""",2011,"""all""","""all""",
"""Bangladesh""",2014,"""all""","""all""",
"""India""",2014,"""all""","""all""",
…,…,…,…,…
"""South Asia""",2011,"""all""","""all""",
"""South Asia""",2014,"""all""","""all""",
"""South Asia""",2017,"""all""","""all""",
"""South Asia""",2021,"""all""","""all""",32.426813


In [73]:
def financial_resiliance_all(df): 
    country_resiliance_chart = alt.Chart(df).mark_bar().encode(
        x = 'year:N', 
        y = alt.Y('fin24aSD_ND:Q'),
        column= alt.Column('countrynewwb', title  = 'Country', sort = south_asia_countries)
    )
    return country_resiliance_chart

In [74]:
financial_resiliance_all(filter_south_asia(process_data(), False))