In [2]:
import pandas as pd
pd.set_option('mode.chained_assignment', None) # suppress unnecessary warnings
import numpy as np
from os import listdir
import re

import sqlalchemy as sa
import cx_Oracle

from pandas.tseries.offsets import Day, MonthEnd
from dateutil.relativedelta import relativedelta
import datetime
import os

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from chart_studio.plotly import plot, iplot
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings('ignore')

In [3]:
os.listdir()

['.ipynb_checkpoints',
 'financial_inclusion.png',
 'findex.ipynb',
 'FINDEXCountry-Series.csv',
 'FINDEXCountry.csv',
 'FINDEXData.csv',
 'FINDEXFootNote.csv',
 'FINDEXSeries.csv',
 'wld_lmc_mmr.xlsx']

In [4]:
country = pd.read_csv('FINDEXCountry.csv').dropna(how='all',axis='columns')

In [5]:
#reading main csv file
df = pd.read_csv('FINDEXData.csv')

#removing columns that have only null values
df = df.drop(columns=['Indicator Code']).dropna(how='all',axis='columns')

#removing all the rows that have only null values
df = df.set_index(['Country Name','Country Code','Indicator Name']).dropna(how='all',axis='rows').reset_index()

#taking only 2017 data
df = df.drop(columns=['Country Name','2011','2014'])

#merging with country dataset
df = df.merge(country[['Country Code', 'Short Name','Region','Income Group']],on='Country Code')

#making the column names uniformed
df.columns = df.columns.str.lower().str.replace(' ','_')

#split the indicator into two different groups with open parentheses as delimiter
df[['main_indicator','sub_indicator']] = df['indicator_name'].str.rsplit("(", expand=True)

#removing close parentheses
df['sub_indicator'] = df['sub_indicator'].str.replace(")",'')

#only taking main category which is % age 15+
df = df.loc[df['sub_indicator']=='% age 15+']

#removing those that contains colon or comma
df = df[~df['main_indicator'].str.contains(":")]
df = df[~df['main_indicator'].str.contains(",")]

#removing the sub indicator columns and removing those columns with null values only
df = df.drop(columns=['sub_indicator']).dropna(how='all',axis='columns')

In [6]:
#Choosing the country
country_code_list = ['Myanmar']
selected = df.loc[df['short_name'].isin(country_code_list)]
selected = selected.drop(columns=['indicator_name','region','income_group']).dropna(subset=['2017'])

In [7]:
selected.head()

Unnamed: 0,country_code,2017,short_name,main_indicator
81229,MMR,25.992298,Myanmar,Account
81241,MMR,43.872032,Myanmar,Borrowed any money in the past year
81265,MMR,7.060641,Myanmar,Borrowed for health or medical purposes
81277,MMR,19.091125,Myanmar,Borrowed from a financial institution
81278,MMR,19.091125,Myanmar,Borrowed from a financial institution or used ...


# Digital 

In [146]:
digital = ['Used a mobile phone or the internet to access a financial institution account in the past year ',
       'Used the internet to buy something online in the past year',
       'Used the internet to pay bills in the past year ']
digital_mm = selected.loc[(selected['main_indicator'].isin(digital))&(selected['country_code']=='MMR')]
digital_mm['main_indicator'] = digital_mm['main_indicator'].str.replace('Used the internet ','')
digital_mm['main_indicator'] = digital_mm['main_indicator'].str.replace('Used a mobile phone or the internet ','')
digital_mm['2017'] = np.round(digital_mm['2017'],decimals=0) 
digital_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)

Unnamed: 0,main_indicator,2017
81898,to buy something online in the past year,3.0
81882,to access a financial institution account in t...,1.0
81910,to pay bills in the past year,1.0


In [147]:
digital_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)[['main_indicator']].values

array([['to buy something online in the past year'],
       ['to access a financial institution account in the past year '],
       ['to pay bills in the past year ']], dtype=object)

# Savings 

In [148]:
saved_mm = selected.loc[(selected['main_indicator'].str.contains('Saved'))&(selected['country_code']=='MMR')]
#saved_mm['main_indicator'] = saved_mm['main_indicator'].str.replace('No account because of','')
#saved_mm['main_indicator'] = saved_mm['main_indicator'].str.replace('saved','')
saved_mm['2017'] = np.round(saved_mm['2017'],decimals=0) 
saved_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)

Unnamed: 0,main_indicator,2017
81774,Saved any money in the past year,36.0
81810,Saved for old age,13.0
81786,Saved at a financial institution,8.0
81834,Saved using a savings club or a person outside...,8.0
81798,Saved for education or school fees,


In [149]:
saved_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)[['main_indicator']].values

array([['Saved any money in the past year '],
       ['Saved for old age '],
       ['Saved at a financial institution '],
       ['Saved using a savings club or a person outside the family '],
       ['Saved for education or school fees ']], dtype=object)

# Borrowing 

In [137]:
borrowed_mm = selected.loc[(selected['main_indicator'].str.contains('Borrowed'))&(selected['country_code']=='MMR')]
#borrowed_mm['main_indicator'] = borrowed_mm['main_indicator'].str.replace('No account because of','')
borrowed_mm['main_indicator'] = borrowed_mm['main_indicator'].str.replace('Borrowed','')
borrowed_mm['2017'] = np.round(borrowed_mm['2017'],decimals=0) 
borrowed_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)

Unnamed: 0,main_indicator,2017
81241,any money in the past year,44.0
81325,from family or friends,22.0
81277,from a financial institution,19.0
81278,from a financial institution or used a credit...,19.0
81265,for health or medical purposes,7.0
81301,from a savings club,1.0
81253,for education or school fees,
81313,from a store by buying on credit,


In [131]:
no_account_mm = selected.loc[(selected['main_indicator'].str.contains('No account'))&(selected['country_code']=='MMR')]
no_account_mm['main_indicator'] = no_account_mm['main_indicator'].str.replace('No account because of','')
no_account_mm['main_indicator'] = no_account_mm['main_indicator'].str.replace('No account because','')
no_account_mm['2017'] = np.round(no_account_mm['2017'],decimals=0) 
no_account_mm[['main_indicator','2017']].sort_values(by=['2017'],ascending=False)

Unnamed: 0,main_indicator,2017
81525,insufficient funds,57.0
81527,lack of necessary documentation,24.0
81521,financial institutions are too far away,17.0
81523,financial services are too expensive,7.0
81535,someone in the family has an account,7.0
81531,no need for financial services ONLY,5.0
81533,religious reasons,2.0
81529,lack of trust in financial institutions,1.0


# Appendix

In [60]:
a.shape()

In [178]:
sunburst = df.loc[(df['region']=='East Asia & Pacific')]
sunburst = sunburst.loc[(sunburst['income_group']=='Lower middle income')]
ownership = ['Credit card ownership ','Debit card ownership ','Mobile money account ','Financial institution account ']
sunburst = sunburst.loc[sunburst['main_indicator'].isin(ownership)]
sunburst = sunburst.drop(columns=['indicator_name']).dropna(subset=['2017'])
sunburst.head()

Unnamed: 0,country_code,2017,short_name,region,income_group,main_indicator
30327,KHM,0.554112,Cambodia,East Asia & Pacific,Lower middle income,Credit card ownership
30339,KHM,7.194252,Cambodia,East Asia & Pacific,Lower middle income,Debit card ownership
30352,KHM,17.802168,Cambodia,East Asia & Pacific,Lower middle income,Financial institution account
30463,KHM,5.659509,Cambodia,East Asia & Pacific,Lower middle income,Mobile money account
55895,IDN,2.439473,Indonesia,East Asia & Pacific,Lower middle income,Credit card ownership


In [180]:
fig = px.sunburst(sunburst, path=['region', 'income_group', 'short_name','main_indicator'], values='2017')
fig.show()

In [116]:
#df.loc[df['income_group'].isnull()][['country_code','short_name']].drop_duplicates()
#df.loc[df['income_group']=='Lower middle income'].short_name.unique()
#df.loc[df['income_group']=='Low income'].short_name.unique()

In [162]:
ownership = ['Credit card ownership ','Debit card ownership ','Mobile money account ','Financial institution account ']
ownership = selected.loc[(selected['main_indicator'].isin(ownership))]
ownership['2017'] = np.round(ownership['2017'],decimals=0) 
ownership[['main_indicator','short_name','2017']].sort_values(by=['2017'],ascending=False)

Unnamed: 0,main_indicator,short_name,2017
14122,Financial institution account,World,67.0
7435,Financial institution account,Lower middle income,56.0
14108,Debit card ownership,World,48.0
7421,Debit card ownership,Lower middle income,27.0
81398,Financial institution account,Myanmar,26.0
14095,Credit card ownership,World,18.0
7546,Mobile money account,Lower middle income,5.0
81385,Debit card ownership,Myanmar,5.0
14231,Mobile money account,World,4.0
7409,Credit card ownership,Lower middle income,3.0
