# NOTEBOOK

## Data sources:
- DHIS2 health indicators: 2011 and 2016
- DGFP health indicators: 2011 and 2016
- DHS raw variables: 2011 and 2014
- SVRS raw variables: 2012 and 2015
- CES indicators: 2011 and 2016

## Time points:
![Timpoints](timepoints.png)

In [1]:
import os
import re
import numpy as np
import pandas as pd
from collections import Counter
from fuzzywuzzy import fuzz

In [2]:
def intersect_dfs(input_df1, input_df2):
    df1 = input_df1.copy(deep=True)
    df2 = input_df2.copy(deep=True)
    subset_var = list(set(list(df1.columns)).intersection(set(list(df2.columns))))
    return df1[subset_var], df2[subset_var]

def read_ces(files_list, common=True):
    data_dict = {}
    for file in files_list:
        data_dict[file] = pd.read_csv(file, encoding='cp850')
        data_dict[file].rename(columns={'Survey.Units"':'geo'}, inplace=True)
        subset = [not bool(re.search(r"Division|Launch District|CC|KCC|RCC|DCC|SCC|CCC|BCC|Urban|Rural|CC Slum| Slum|National", geo)) for geo in data_dict[file]['Survey.Units']]
        print(Counter([not bool(re.search(r"Division|Launch District|CC|KCC|RCC|DCC|SCC|CCC|BCC|Urban|Rural|CC Slum| Slum|National", geo)) for geo in data_dict[file]['Survey.Units']]))
        data_dict[file] = data_dict[file].loc[subset,:]
        print(data_dict[file].shape)
    return data_dict

def match_districts(ref_df, ref_match, input_df, input_match):
    out = pd.DataFrame()
    for key, code in enumerate(input_df[input_match]):
        code_match = {}
        code_match['FuzzRatio'] = [fuzz.ratio(ref_code, code)  for ref_code in ref_df[ref_match]]
        code_match['Geo'] = code
        code_match['DistrictGeo'] = [value[0] for value in ref_df.values]
        code_match['DistrictName'] = [value[1]  for value in ref_df.values]
        code_match = pd.DataFrame.from_dict(code_match)
        out = out.append(code_match.sort_values('FuzzRatio', ascending=False).iloc[0,:])
    for var in list(out.columns):
        if out[var].dtype.kind == 'f':
            out[var] = out[var].astype(int)
            out[var] = out[var].astype(str)
            code_length = max([len(char) for char in out[var]])
            out[var] = out[var].str.pad(width=code_length, side='left', fillchar='0') 
    return out

def read_dgfp(files_list):
    data_dict = {}
    for file in files_list:
        data_dict[file] = pd.read_csv(file)
    return data_dict

def distrGO_rates(input_df, pattern, denominator_male, denominator_female):
    df = input_df.copy(deep=True)
    vars_rates = [var for var in df.columns if pattern in var]
    for var in vars_rates:
        if "_male" in var:
            df[var] = np.round(df[var]/(denominator_male/1000),4)
        else: 
            df[var] = np.round(df[var]/(denominator_female/1000), 4)
    return df, vars_rates

## GEOS

## CES Data

In [3]:
CES2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2011.csv'
CES2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2016.csv'
ces_list = [CES2011, CES2016]
ces = read_ces(files_list=ces_list, common=True)
ces.keys()

Counter({True: 64, False: 20})
(64, 44)
Counter({True: 64, False: 24})
(64, 52)


dict_keys(['/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2011.csv', '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2016.csv'])

In [4]:
ces_2011, ces_2016 = intersect_dfs(input_df1=ces[ces_list[0]], input_df2=ces[ces_list[1]])
print(ces_2011.shape)
print(ces_2016.shape)

(64, 32)
(64, 32)


In [5]:
ces_2011['DistrictCode'] = ces_2011['DistrictCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2011['DivisionCode'] = ces_2011['DivisionCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2011['DistrictGeo'] = ces_2011['DivisionCode'].str.cat(ces_2011['DistrictCode'], sep="")
ces_2011 = ces_2011.drop(['DivisionName', 'DivisionCode', 'Geo', 'DistrictCode', 'Year', 'FuzzRatio', 'Survey.Units'], axis=1)
ces_2011.head()

Unnamed: 0,TT1_Mother0-11MChildren,Measles_Children23M,TT4_Mother0-11MChildren,OPV3_Children12M,OPV2_Children23M,OPV3_Children23M,PENTA2_Children12M,TT3_Mother0-11MChildren,PENTA3_Children23M,VitACoverage_Children12-59M,...,OPV2_Children12M,PENTA1_Children23M,PENTA2_Children23M,BCG_Children23M,OPV1_Children12M,BCG_Children12M,Measles_Children12M,PENTA3_Children12M,DistrictName,DistrictGeo
0,96.7,84.5,71.9,92.0,98.3,93.5,97.5,86.7,84.1,88.1,...,97.5,99.0,98.3,99.0,98.3,98.3,81.3,83.3,Brahmanbaria,2012
1,90.0,88.1,52.9,93.3,96.0,93.3,96.0,71.4,85.6,96.7,...,96.0,98.1,96.0,98.1,98.1,98.1,86.7,85.6,Bagerhat,4001
2,89.0,82.3,54.3,88.8,90.8,89.4,90.2,72.4,84.4,83.3,...,90.8,94.3,90.2,94.3,93.8,94.3,79.2,83.8,Bandarban,2003
3,99.0,88.1,47.6,94.7,98.6,95.4,98.6,72.4,87.9,96.7,...,98.6,100.0,98.6,100.0,99.5,100.0,82.6,87.3,Barguna,1004
4,97.1,86.8,52.4,94.5,98.1,94.5,98.1,77.1,86.9,79.0,...,98.1,98.6,98.1,98.6,98.6,98.6,82.6,86.9,Barisal,1006


In [6]:
ces_2016['DistrictCode'] = ces_2016['DistrictCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2016['DivisionCode'] = ces_2016['DivisionCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2016['DistrictGeo'] = ces_2016['DivisionCode'].str.cat(ces_2016['DistrictCode'], sep="")
ces_2016 = ces_2016.drop(['DivisionName', 'DivisionCode', 'Geo', 'DistrictCode', 'Year', 'FuzzRatio', 'Survey.Units'], axis=1)
ces_2016.head()

Unnamed: 0,TT1_Mother0-11MChildren,Measles_Children23M,TT4_Mother0-11MChildren,OPV3_Children12M,OPV2_Children23M,OPV3_Children23M,PENTA2_Children12M,TT3_Mother0-11MChildren,PENTA3_Children23M,VitACoverage_Children12-59M,...,OPV2_Children12M,PENTA1_Children23M,PENTA2_Children23M,BCG_Children23M,OPV1_Children12M,BCG_Children12M,Measles_Children12M,PENTA3_Children12M,DistrictName,DistrictGeo
0,98.1,92.2,61.9,90.8,97.1,91.0,96.8,85.3,91.0,82.0,...,96.8,97.4,97.1,98.2,97.4,98.2,90.3,90.8,Bagerhat,4001
1,94.8,89.8,79.6,87.8,94.8,87.8,94.8,86.1,87.8,84.1,...,94.8,96.3,94.8,99.0,96.3,99.0,86.3,87.8,Bandarban,2003
2,98.8,94.9,64.4,93.0,98.3,93.6,97.9,88.4,93.6,96.8,...,97.9,98.8,98.3,99.7,98.8,99.7,91.0,93.0,Barguna,1004
3,100.0,97.1,79.3,95.5,99.3,96.0,99.3,96.7,96.0,100.0,...,99.3,99.1,99.3,99.7,99.1,99.7,93.1,95.5,Barisal,1006
6,100.0,96.6,79.0,94.5,99.8,94.5,99.8,94.1,94.5,98.4,...,99.8,99.8,99.8,99.8,99.8,99.8,95.4,94.5,Bhola,1009


In [7]:
check_list = []
for var1, var2 in zip(sorted(ces_2011['DistrictName']), sorted(ces_2011['DistrictName'])):
    check_list.append(var1==var2)
print(all(check_list))
geo = ces_2011[['DistrictGeo', 'DistrictName']]
geo.head()

True


Unnamed: 0,DistrictGeo,DistrictName
0,2012,Brahmanbaria
1,4001,Bagerhat
2,2003,Bandarban
3,1004,Barguna
4,1006,Barisal


## SVRS Data 

In [8]:
SVRS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2012_clean.csv'
SVRS2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2015_clean.csv'

In [9]:
svrs_2011 = pd.read_csv(SVRS2011)
svrs_2011['district'] = svrs_2011['district'].str.replace(' Zila', '')
display(svrs_2011.head())
svrs_2016 = pd.read_csv(SVRS2016)
display(svrs_2016.head())
svrs_2011, svrs_2016 = intersect_dfs(input_df1=svrs_2011, input_df2=svrs_2016)
print(svrs_2011.columns)
print(svrs_2016.columns)

Unnamed: 0,district,prop_live_births,prop_registered_births,prop_deaths_rural,sex_ratio,dependency_ratio,prop_pop_rural,prop_pop_women,prop_pop_rural_women,prop_women_15.45y_overwomen,prop_married_women_15.45y,prop_married_..15y,rate_live_births,rate_fertility,rate_death,rate_child_death,rate_under5y_mortality,rate_infant_mortality,rate_maternal_mortality,year
0,Bagerhat,98.62,7,78,103.13,51.4,73.67,49.23,36.03,50.65,73.58,66.24,10.85,43.49,3.66,1.14,27.97,20.98,27.97,2012
1,Bandarban,98.61,12,56,107.07,63.84,45.42,48.29,22.1,49.68,73.23,70.54,19.26,80.3,3.48,0.98,56.34,51.64,4.69,2012
2,Barguna,99.3,7,75,100.62,56.56,64.75,49.85,32.28,49.91,82.21,74.9,20.55,82.58,5.77,5.03,38.87,21.2,3.53,2012
3,Barisal,98.96,14,68,105.58,54.58,63.4,48.64,30.64,48.66,69.59,65.86,19.36,81.8,5.97,0.76,52.77,50.13,2.64,2012
4,Bhola,99.02,51,73,110.73,61.07,71.88,47.45,34.15,49.2,73.6,68.84,18.19,77.89,5.13,1.52,66.23,59.6,9.93,2012


Unnamed: 0,district,prop_live_births,prop_registered_births,prop_attendant_delivery,prop_deaths_rural,sex_ratio,dependency_ratio,prop_pop_rural,prop_pop_women,prop_pop_rural_women,...,prop_married_women_15.45y,prop_married_..15y,rate_live_births,rate_fertility,rate_death,rate_child_death,rate_under5y_mortality,rate_infant_mortality,rate_maternal_mortality,year
0,Bagerhat,97.13,7.47,54.02,78,99.06,54.59,77.17,50.24,38.71,...,79.44,73.17,18.76,74.68,6.9,0.0,17.75,17.75,0.0,2015
1,Bandarban,98.59,1.41,15.49,71,99.61,60.72,78.4,50.1,39.54,...,67.15,66.83,19.56,79.33,5.79,0.0,28.57,28.57,0.0,2015
2,Barguna,99.21,3.15,50.39,80,100.28,50.84,69.5,49.93,34.5,...,80.72,76.27,16.34,64.11,5.79,1.96,31.75,23.81,7.94,2015
3,Barisal,97.99,2.01,67.74,34,98.42,51.13,25.83,50.4,13.01,...,76.11,71.08,16.45,62.76,5.15,2.01,27.32,19.13,2.73,2015
4,Bhola,97.51,17.4,29.01,94,103.37,64.28,90.97,49.17,44.69,...,78.37,72.66,22.63,94.44,4.0,3.28,31.16,19.83,0.0,2015


Index(['rate_infant_mortality', 'year', 'prop_women_15.45y_overwomen',
       'prop_married_women_15.45y', 'rate_fertility', 'rate_under5y_mortality',
       'prop_pop_rural_women', 'prop_registered_births', 'prop_married_..15y',
       'rate_death', 'prop_pop_rural', 'district', 'prop_deaths_rural',
       'rate_maternal_mortality', 'prop_pop_women', 'dependency_ratio',
       'sex_ratio', 'prop_live_births', 'rate_live_births',
       'rate_child_death'],
      dtype='object')
Index(['rate_infant_mortality', 'year', 'prop_women_15.45y_overwomen',
       'prop_married_women_15.45y', 'rate_fertility', 'rate_under5y_mortality',
       'prop_pop_rural_women', 'prop_registered_births', 'prop_married_..15y',
       'rate_death', 'prop_pop_rural', 'district', 'prop_deaths_rural',
       'rate_maternal_mortality', 'prop_pop_women', 'dependency_ratio',
       'sex_ratio', 'prop_live_births', 'rate_live_births',
       'rate_child_death'],
      dtype='object')


In [10]:
geo_svrs_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=svrs_2011, input_match='district')
geo_svrs_2016 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=svrs_2016, input_match='district')

In [11]:
print(svrs_2011.shape)
print(svrs_2016.shape)
svrs_2011 = svrs_2011.merge(geo_svrs_2011, how='left', left_on='district', right_on='Geo')
svrs_2016 = svrs_2016.merge(geo_svrs_2016, how='left', left_on='district', right_on='Geo')
svrs_2011 = svrs_2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
svrs_2016 = svrs_2016.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(svrs_2011.shape)
print(svrs_2016.shape)

(64, 20)
(64, 20)
(64, 20)
(64, 20)


In [12]:
print(svrs_2011['rate_maternal_mortality'].mean())
print(svrs_2016['rate_maternal_mortality'].mean())

3.7296874999999994
2.65078125


## DHIS2 Data

In [13]:
DHIS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhis2/health_indicators/District_2011_NAME.csv'
DHIS2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhis2/health_indicators/District_2016_NAME.csv'

In [14]:
dhis_2011 = pd.read_csv(DHIS2011)
dhis_2011['Geo'] = dhis_2011['True'].str.replace(" District", "")
display(dhis_2011.head())
dhis_2016 = pd.read_csv(DHIS2016)
dhis_2016['Geo'] = dhis_2016['True'].str.replace(" District", "")
display(dhis_2016.head())
dhis_2011, dhis_2016 = intersect_dfs(input_df1=dhis_2011, input_df2=dhis_2016)
print(dhis_2011.shape)
print(dhis_2016.shape)

Unnamed: 0,True,07Vaccine&LogisticsstockofUpazilaMunCC: Differences between Pentavalent doses and vial uses,07Vaccine&LogisticsstockofUpazilaMunCC: Penta vial Opening + Receive,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI Form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI Investigation form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI form E36 - E39 need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI line listing form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI BCG diluent need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI BCG need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI CC supply book need with buffer,...,02ChildHealth: IMCI Wasting (%),02ChildHealth: Neonatal Case fatality rate (EmOC),02ChildHealth: Percentage of diarrhea reported at facility,02ChildHealth: Percentage of pneumonia reported at facility,05Logistics: Percentage of functional ambulance,05Logistics: Percentage of functional x-ray,AntenatalCare(ANC): 1st Visit ANC,AntenatalCare(ANC): 2nd Visit ANC,AntenatalCare(ANC): 3rd & more ANC,Geo
0,Bagerhat District,358.0,128774.0,643.0,395.0,398.0,509.0,26253.0,26373.0,93.0,...,1.8,0.2009,11.35,3.7,68.85,37.025,,,,Bagerhat
1,Bandarban District,-667.0,62055.0,115.0,112.0,110.0,120.0,8212.5,8362.5,4.0,...,0.16,0.1165,8.7,8.39,65.24,61.538,,,,Bandarban
2,Barguna District,0.0,109797.0,3856.0,33.0,36.0,38.0,15523.5,15523.5,14.0,...,1.4,1.1355,7.1,4.07,71.36,46.073,,,,Barguna
3,Barishal District,492.0,297323.0,1827.0,552.0,551.0,552.0,40608.0,40608.0,27.0,...,1.0,0.3973,11.74,5.68,81.82,51.923,,,,Barishal
4,Bhola District,10.0,198050.0,3499.0,95.0,92.0,94.0,29293.5,29430.0,41.0,...,8.3,0.0774,13.54,3.74,76.34,67.742,,,,Bhola


Unnamed: 0,True,04Newborn: % of Nurse training on ETAT at SCANU,04Newborn: % of female baby admitted in SCANU reported individually,04Newborn: % of female baby admitted in SCANU reported monthly,04Newborn: % of functional Radiant warmer,04Newborn: % of male baby admitted in SCANU reported individually,04Newborn: % of male baby admitted in SCANU reported monthly,04Newborn: % of non functional Photo therapy unit,04Newborn: % of non-functioning Table Resuscitator with Radiant warmer,04Newborn: % of nurse allocated in SCANU among all nurses in the facility,...,02ChildHealth: IMCI Total Child,02ChildHealth: IMCI Underweight (%),02ChildHealth: IMCI Wasting (%),02ChildHealth: Neonatal Case fatality rate (EmOC),02ChildHealth: Percentage of diarrhea reported at facility,02ChildHealth: Percentage of pneumonia reported at facility,AntenatalCare(ANC): 1st Visit ANC,AntenatalCare(ANC): 2nd Visit ANC,AntenatalCare(ANC): 3rd & more ANC,Geo
0,Bagerhat District,245.0,41.5,39.4,42.6,58.5,60.6,49.3,35.3,38.1,...,215355.0,4.3,1.7,0.187,11.57,3.32,,,,Bagerhat
1,Bandarban District,,48.3,42.9,0.0,51.7,57.1,0.0,0.0,0.0,...,59810.0,1.6,0.35,0.3076,9.28,8.57,,,,Bandarban
2,Barguna District,,0.0,,,100.0,,,,,...,127829.0,2.8,1.1,1.2882,7.47,4.38,,,,Barguna
3,Barishal District,,100.0,,,0.0,,,,,...,209694.0,4.2,0.88,0.0354,13.06,6.35,,,,Barishal
4,Bhola District,379.3,37.0,36.4,23.1,63.0,63.6,12.6,0.0,13.7,...,171269.0,7.7,5.3,0.0986,13.35,4.6,,,,Bhola


(64, 350)
(64, 350)


In [15]:
geo_dhis_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhis_2011, input_match='Geo')
geo_dhis_2016 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhis_2016, input_match='Geo')

In [16]:
print(dhis_2011.shape)
print(dhis_2016.shape)
dhis_2011 = dhis_2011.merge(geo_dhis_2011, how='left', left_on='Geo', right_on='Geo')
dhis_2016 = dhis_2016.merge(geo_dhis_2016, how='left', left_on='Geo', right_on='Geo')
dhis_2011 = dhis_2011.drop(['FuzzRatio','Geo',], axis=1)
dhis_2016 = dhis_2016.drop(['FuzzRatio','Geo',], axis=1)
print(dhis_2011.shape)
print(dhis_2016.shape)

(64, 350)
(64, 350)
(64, 351)
(64, 351)


## DGFP Data

In [17]:
DGFP2011a = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_ngothanaprocess_2011.csv'
DGFP2011b = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_thanaprocess_2011.csv'
DGFP2011c = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_district_monthprocess_2011.csv'
DGFP2011d = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_ngodistrict_monthprocess_2011.csv'
DGFP2011e = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_distributionGO_distmonthProcess_2011.csv'
DGFP2011 = [DGFP2011a, DGFP2011b, DGFP2011c, DGFP2011d, DGFP2011e]
DGFP2016a = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_ngothanaprocess_2016.csv'
DGFP2016b = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_thanaprocess_2016.csv'
DGFP2016c = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_district_monthprocess_2016.csv'
DGFP2016d = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_ngodistrict_monthprocess_2016.csv'
DGFP2016e = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_distributionGO_distmonthProcess_2016.csv'

DGFP2016 = [DGFP2016a, DGFP2016b, DGFP2016c, DGFP2016d, DGFP2016e]

In [18]:
dgfp2011 = read_dgfp(files_list=DGFP2011)
dgfp2016 = read_dgfp(files_list=DGFP2016)

In [19]:
dgfp2011[DGFP2011e]['distr_GOdistr_GOfemalepn'].sum()

0

In [20]:
dgfp2011df = pd.concat([dgfp2011[DGFP2011a], 
                        dgfp2011[DGFP2011b].drop('geo', axis=1), 
                        dgfp2011[DGFP2011c].drop('geo', axis=1),
                        dgfp2011[DGFP2011d].drop('geo', axis=1),
                        dgfp2011[DGFP2011e].drop('geo', axis=1)], axis=1)
print(dgfp2011df.shape)
print(dgfp2011df.columns)
dgfp2016df = pd.concat([dgfp2016[DGFP2016a], 
                        dgfp2016[DGFP2016b].drop('geo', axis=1), 
                        dgfp2016[DGFP2016c].drop('geo', axis=1), 
                        dgfp2016[DGFP2016d].drop('geo', axis=1),
                        dgfp2016[DGFP2016e].drop('geo', axis=1)], axis=1)
print(dgfp2016df.shape)
print(dgfp2016df.columns)

(64, 59)
Index(['imp11subdistr_ngothanaprocessNGO_Percent_Pill',
       'imp11subdistr_ngothanaprocessNGO_Percent_Condom',
       'imp11subdistr_ngothanaprocessNGO_Percent_Injectable',
       'imp11subdistr_ngothanaprocessNGO_Percent_IUD',
       'imp11subdistr_ngothanaprocessNGO_Percent_Implant',
       'imp11subdistr_ngothanaprocessNGO_Percent_PerMale',
       'imp11subdistr_ngothanaprocessNGO_Percent_PerFemale',
       'imp11subdistr_ngothanaprocessNGO_CAR', 'geo',
       'imp11subdistr_thanaprocessPercent_Pill',
       'imp11subdistr_thanaprocessPercent_Condom',
       'imp11subdistr_thanaprocessPercent_Injectable',
       'imp11subdistr_thanaprocessPercent_IUD',
       'imp11subdistr_thanaprocessPercent_Implant',
       'imp11subdistr_thanaprocessPercent_PerMale',
       'imp11subdistr_thanaprocessPercent_PerFemale',
       'imp11subdistr_thanaprocessCAR',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Pill',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent

In [21]:
dgfp2011df, dgfp2016df = intersect_dfs(input_df1= dgfp2011df, input_df2= dgfp2016df)

In [22]:
dgfp2011df["geo"] = dgfp2011df["geo"].astype(str)
print(dgfp2011df.shape)
dgfp2011df = dgfp2011df.merge(geo, how='left', left_on="geo", right_on="DistrictGeo")
print(dgfp2011df.shape)
dgfp2011df = dgfp2011df.drop('geo', axis=1)

(64, 59)
(64, 61)


In [23]:
dgfp2016df["geo"] = dgfp2016df["geo"].astype(str)
print(dgfp2016df.shape)
dgfp2016df = dgfp2016df.merge(geo, how='left', left_on="geo", right_on="DistrictGeo")
print(dgfp2016df.shape)
dgfp2016df = dgfp2016df.drop('geo', axis=1)

(64, 59)
(64, 61)


In [24]:
dgfp2011df.columns

Index(['imp12distr_monthprocessImp12DistrMonthThana_Percent_Implant',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Pill',
       'imp11subdistr_thanaprocessPercent_PerFemale',
       'imp11subdistr_ngothanaprocessNGO_CAR',
       'imp11subdistr_ngothanaprocessNGO_Percent_IUD',
       'distr_GOdistr_GOfemalepn',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Condom',
       'distr_GOdistr_GOIUD_normal', 'Imp12DistrNGOMonthThana_Percent_PerMale',
       'distr_GOdistr_GOmrm_pack', 'distr_GOdistr_GOinj_mgs4',
       'distr_GOdistr_GOInj_vial', 'distr_GOdistr_GOifa_number',
       'imp11subdistr_ngothanaprocessNGO_Percent_Injectable',
       'imp11subdistr_thanaprocessCAR', 'distr_GOdistr_GOShukhi',
       'Imp12DistrNGOMonthThana_Percent_Condom',
       'Imp12DistrNGOMonthThana_Percent_Injectable',
       'imp11subdistr_ngothanaprocessNGO_Percent_PerFemale',
       'imp11subdistr_ngothanaprocessNGO_Percent_Pill',
       'imp11subdistr_ngothanaprocessNGO_Percent

## Demographics

In [25]:
woman15_45_2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2012.csv'
woman15_45_2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2015.csv'
TOTAL_POP2011 = 144043697
TOTAL_POP2016 = 162951560
data_svrs2011 = pd.read_csv(woman15_45_2011)
data_svrs2011['district'] = data_svrs2011['district'].str.replace(' Zila', '')
data_svrs2016 = pd.read_csv(woman15_45_2016)
data_svrs2011['total_bang_svrs'] = data_svrs2011['total_pop'].sum()
data_svrs2016['total_bang_svrs'] = data_svrs2016['total_pop'].sum()
data_svrs2011['total_bang'] = TOTAL_POP2011
data_svrs2016['total_bang'] = TOTAL_POP2016

data_svrs2011['total_pop_percent'] = data_svrs2011['total_pop']/data_svrs2011['total_pop'].sum()
data_svrs2016['total_pop_percent'] = data_svrs2016['total_pop']/data_svrs2016['total_pop'].sum()
data_svrs2011['total_pop_abs'] = data_svrs2011['total_pop_percent']*TOTAL_POP2011
data_svrs2016['total_pop_abs'] = data_svrs2016['total_pop_percent']*TOTAL_POP2016
data_svrs2011['woman15_45_abs'] = np.round(data_svrs2011['total_pop_abs'] * data_svrs2011['women_15.45y']/data_svrs2011['total_pop'])
data_svrs2016['woman15_45_abs'] = np.round(data_svrs2016['total_pop_abs'] * data_svrs2016['women_15.45y']/data_svrs2016['total_pop'])
display(data_svrs2011[['total_bang_svrs', 'total_pop', 'total_pop_percent', 'women_15.45y',
                      'total_pop_abs', 'woman15_45_abs', 'total_bang']].head())
display(data_svrs2016[['total_bang_svrs', 'total_pop', 'total_pop_percent', 'women_15.45y',
                      'total_pop_abs', 'woman15_45_abs', 'total_bang']].head())

Unnamed: 0,total_bang_svrs,total_pop,total_pop_percent,women_15.45y,total_pop_abs,woman15_45_abs,total_bang
0,1116845,13370,0.011971,3334,1724379.0,429999.0,144043697
1,1116845,11213,0.01004,2690,1446183.0,346939.0,144043697
2,1116845,13871,0.01242,3451,1788995.0,445088.0,144043697
3,1116845,19779,0.01771,4682,2550972.0,603855.0,144043697
4,1116845,16772,0.015017,3916,2163148.0,505061.0,144043697


Unnamed: 0,total_bang_svrs,total_pop,total_pop_percent,women_15.45y,total_pop_abs,woman15_45_abs,total_bang
0,939530,9274,0.009871,2330,1608477.0,404114.0,162951560
1,939530,3629,0.003863,895,629411.7,155228.0,162951560
2,939530,7773,0.008273,1981,1348145.0,343584.0,162951560
3,939530,45404,0.048326,11903,7874844.0,2064450.0,162951560
4,939530,15999,0.017029,3833,2774858.0,664793.0,162951560


In [26]:
geo_svrs_2011s = match_districts(ref_df=geo, ref_match='DistrictName', input_df=data_svrs2011, input_match='district')
geo_svrs_2016s = match_districts(ref_df=geo, ref_match='DistrictName', input_df=data_svrs2016, input_match='district')
print(data_svrs2011.shape)
print(data_svrs2016.shape)
data_svrs2011 = data_svrs2011.merge(geo_svrs_2011s, how='left', left_on='district', right_on='Geo')
data_svrs2016 = data_svrs2016.merge(geo_svrs_2016s, how='left', left_on='district', right_on='Geo')
data_svrs2011 = data_svrs2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
data_svrs2016 = data_svrs2016.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(data_svrs2011.shape)
print(data_svrs2016.shape)

(64, 47)
(64, 48)
(64, 47)
(64, 48)


## Recalculating indicators

In [27]:
print(sorted(data_svrs2011['DistrictName']) == sorted(dgfp2011df['DistrictName']))
print(sorted(data_svrs2016['DistrictName']) == sorted(dgfp2016df['DistrictName']))

True
True


In [28]:
dgfp2011df = dgfp2011df.sort_values("DistrictName")
data_svrs2011 = data_svrs2011.sort_values("DistrictName")
dgfp2011df, variables = distrGO_rates(input_df=dgfp2011df, pattern="distr_GO", 
              denominator_female=data_svrs2011['woman15_45_abs'], 
              denominator_male=data_svrs2011['total_bang'])
display(dgfp2011df[variables].head())

Unnamed: 0,distr_GOdistr_GOfemalepn,distr_GOdistr_GOIUD_normal,distr_GOdistr_GOmrm_pack,distr_GOdistr_GOinj_mgs4,distr_GOdistr_GOInj_vial,distr_GOdistr_GOifa_number,distr_GOdistr_GOShukhi,distr_GOdistr_GOfemalenormal,distr_GOdistr_GOsanitary_pad,distr_GOdistr_GOPill_total,...,distr_GOdistr_GOper_male,distr_GOdistr_GOPermanent_method,distr_GOdistr_GOInj_siringe,distr_GOdistr_GOImp_total,distr_GOdistr_GOIUD_total,distr_GOdistr_GOImp_normal,distr_GOdistr_GOfemale_total,distr_GOdistr_GOIUD_partum,distr_GOdistr_GOImp_Jadel,distr_GOdistr_GOmisoprostol
2,0.0,8.3871,0.0,0.0,377.7073,0.0,3657.7126,3.0646,0.0,3657.7126,...,0.0141,7.6412,0.0,8.4006,8.3871,8.4006,3.0646,0.0,0.0,0.0
63,0.0,2.6429,0.0,0.0,91.5638,0.0,655.7759,0.5443,0.0,655.7759,...,0.0041,1.9211,0.0,1.7435,2.6429,1.7435,0.5443,0.0,0.0,0.0
11,0.0,4.3501,0.0,0.0,265.0741,0.0,1302.0635,0.4057,0.0,1302.0635,...,0.0172,5.8866,0.0,9.565,4.3501,9.565,0.4057,0.0,0.0,0.0
61,0.0,12.7257,0.0,0.0,552.4833,0.0,2935.3205,3.673,0.0,2935.3205,...,0.0194,9.1668,0.0,7.5193,12.7257,7.5193,3.673,0.0,0.0,0.0
51,0.0,6.844,0.0,0.0,1017.456,0.0,2403.5347,0.7171,0.0,2403.5347,...,0.013,5.3922,0.0,7.4587,6.844,7.4587,0.7171,0.0,0.0,0.0


In [29]:
dgfp2016df = dgfp2016df.sort_values(by ="DistrictName")
data_svrs2016 = data_svrs2016.sort_values("DistrictName")
dgfp2016df, variables = distrGO_rates(input_df=dgfp2016df, pattern="distr_GO", 
              denominator_female=data_svrs2016['woman15_45_abs'], 
              denominator_male=data_svrs2016['total_bang'])
display(dgfp2016df[variables].head())

Unnamed: 0,distr_GOdistr_GOfemalepn,distr_GOdistr_GOIUD_normal,distr_GOdistr_GOmrm_pack,distr_GOdistr_GOinj_mgs4,distr_GOdistr_GOInj_vial,distr_GOdistr_GOifa_number,distr_GOdistr_GOShukhi,distr_GOdistr_GOfemalenormal,distr_GOdistr_GOsanitary_pad,distr_GOdistr_GOPill_total,...,distr_GOdistr_GOper_male,distr_GOdistr_GOPermanent_method,distr_GOdistr_GOInj_siringe,distr_GOdistr_GOImp_total,distr_GOdistr_GOIUD_total,distr_GOdistr_GOImp_normal,distr_GOdistr_GOfemale_total,distr_GOdistr_GOIUD_partum,distr_GOdistr_GOImp_Jadel,distr_GOdistr_GOmisoprostol
2,0.6258,9.398,0.1048,0.0,391.194,164.3616,4134.4882,1.7114,1.2632,4209.646,...,0.0045,4.4763,421.923,10.1809,9.4708,9.6017,2.3371,0.0728,0.5792,34.2216
63,0.0311,3.1421,0.0985,0.0,108.3455,41.7469,653.2773,0.1996,0.0674,671.4688,...,0.003,1.5088,108.9366,2.8958,3.194,2.8595,0.2255,0.0518,0.0363,10.6344
11,0.5082,4.0778,0.1102,0.0,222.7787,97.8945,1175.802,0.3266,0.4123,1196.4034,...,0.0057,2.7186,228.126,8.5741,4.0778,8.1495,0.8348,0.0,0.4245,17.2747
61,0.2462,1.5042,0.0103,0.0,107.2469,57.8425,602.7928,0.3437,0.0697,611.6951,...,0.008,1.1744,116.021,2.8073,1.5342,2.6675,0.5894,0.0299,0.1399,7.5774
51,0.3184,8.1887,95.9963,0.0,900.6146,2109.482,2015.356,0.0398,0.8268,2050.1934,...,0.0088,3.5129,977.2401,13.331,9.115,13.0502,0.3316,0.9263,0.2808,38.4233


In [30]:
dgfp2011df.head()

Unnamed: 0,imp12distr_monthprocessImp12DistrMonthThana_Percent_Implant,imp12distr_monthprocessImp12DistrMonthThana_Percent_Pill,imp11subdistr_thanaprocessPercent_PerFemale,imp11subdistr_ngothanaprocessNGO_CAR,imp11subdistr_ngothanaprocessNGO_Percent_IUD,distr_GOdistr_GOfemalepn,imp12distr_monthprocessImp12DistrMonthThana_Percent_Condom,distr_GOdistr_GOIUD_normal,Imp12DistrNGOMonthThana_Percent_PerMale,distr_GOdistr_GOmrm_pack,...,distr_GOdistr_GOIUD_partum,Imp12DistrNGOMonthThana_Percent_Pill,Imp12DistrNGOMonthThana_CAR,distr_GOdistr_GOImp_Jadel,distr_GOdistr_GOmisoprostol,imp11subdistr_ngothanaprocessNGO_Percent_PerMale,imp12distr_monthprocessImp12DistrMonthThana_Percent_PerMale,imp11subdistr_thanaprocessPercent_IUD,DistrictGeo,DistrictName
2,2.42,51.52,7.76,80.02,3.95,0.0,8.31,8.3871,3.81,0.0,...,0.0,53.61,80.02,0.0,0.0,3.81,5.53,5.41,4001,Bagerhat
63,5.22,42.68,6.65,,,0.0,8.76,2.6429,,0.0,...,0.0,,,0.0,0.0,,4.76,7.68,2003,Bandarban
11,5.08,44.92,6.72,69.28,3.26,0.0,3.63,4.3501,3.19,0.0,...,0.0,41.21,69.28,0.0,0.0,3.19,7.91,3.64,1004,Barguna
61,2.68,51.94,8.47,68.72,2.35,0.0,8.25,12.7257,0.55,0.0,...,0.0,56.44,68.72,0.0,0.0,0.55,2.46,4.35,1006,Barisal
51,2.64,40.4,4.01,73.41,3.78,0.0,4.93,6.844,1.48,0.0,...,0.0,30.32,73.41,0.0,0.0,1.48,2.71,2.52,1009,Bhola


In [31]:
dgfp2016df.head()

Unnamed: 0,imp12distr_monthprocessImp12DistrMonthThana_Percent_Implant,imp12distr_monthprocessImp12DistrMonthThana_Percent_Pill,imp11subdistr_thanaprocessPercent_PerFemale,imp11subdistr_ngothanaprocessNGO_CAR,imp11subdistr_ngothanaprocessNGO_Percent_IUD,distr_GOdistr_GOfemalepn,imp12distr_monthprocessImp12DistrMonthThana_Percent_Condom,distr_GOdistr_GOIUD_normal,Imp12DistrNGOMonthThana_Percent_PerMale,distr_GOdistr_GOmrm_pack,...,distr_GOdistr_GOIUD_partum,Imp12DistrNGOMonthThana_Percent_Pill,Imp12DistrNGOMonthThana_CAR,distr_GOdistr_GOImp_Jadel,distr_GOdistr_GOmisoprostol,imp11subdistr_ngothanaprocessNGO_Percent_PerMale,imp12distr_monthprocessImp12DistrMonthThana_Percent_PerMale,imp11subdistr_thanaprocessPercent_IUD,DistrictGeo,DistrictName
2,4.09,50.45,8.12,83.61,6.05,0.6258,9.42,9.398,4.69,0.1048,...,0.0728,44.81,83.61,0.5792,34.2216,4.69,5.33,4.34,4001,Bagerhat
63,9.44,37.61,6.3,,,0.0311,7.23,3.1421,,0.0985,...,0.0518,,,0.0363,10.6344,,9.05,8.22,2003,Bandarban
11,6.57,43.49,5.74,84.62,2.38,0.5082,3.53,4.0778,3.2,0.1102,...,0.0,41.16,84.62,0.4245,17.2747,3.2,9.46,3.21,1004,Barguna
61,5.33,49.26,8.42,76.43,1.65,0.2462,8.62,1.5042,0.97,0.0103,...,0.0299,53.3,76.43,0.1399,7.5774,0.97,2.98,4.07,1006,Barisal
51,5.13,39.68,3.23,80.44,5.46,0.3184,5.08,8.1887,2.31,95.9963,...,0.9263,32.15,80.44,0.2808,38.4233,2.31,3.21,3.15,1009,Bhola


## DHS Data

In [32]:
DHS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhs/data/data_dhs_2011_clean.csv' 
DHS2014 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhs/data/data_dhs_2014_clean.csv' 

In [33]:
dhs_2011 = pd.read_csv(DHS2011)
display(dhs_2011.head())
dhs_2014 = pd.read_csv(DHS2014)
display(dhs_2014.head())

Unnamed: 0,district,prop_current_contraceptive,prop_unmet_need_family_planing,prop_antenatal_coverage,prop_antenatal_care4.,prop_institutional_delivery,prop_attendant_delivery,prop_caesarean,sex_ratio,dependency_ratio,...,prop_hypertensive,prop_pop_rural,prop_pop_women,prop_pop_rural_women,prop_women_15.45y_overwomen,prop_married_women_15.45y,prop_married_..15y,prop_female_head,prop_registered_under5,year
0,Bagerhat,65.52,7.89,29.91,26.56,30.66,35.58,15.74,93.31,74.37,...,8.92,11.48,44.85,5.67,46.74,80.33,73.46,11.29,32.84,2011
1,Bandarban,45.45,27.27,30.0,0.0,0.0,0.0,0.0,84.44,107.5,...,0.0,0.0,127.03,0.0,35.56,75.0,69.05,11.29,31.25,2011
2,Barguna,72.36,11.14,37.91,20.97,10.56,16.3,5.21,105.44,70.73,...,19.23,6.48,25.33,2.98,49.26,88.0,77.71,2.97,41.4,2011
3,Barisal,64.33,13.31,35.21,26.53,21.53,26.92,13.06,89.02,75.53,...,19.85,8.22,24.48,4.37,48.17,78.47,71.65,5.1,38.53,2011
4,Bhola,68.4,10.81,25.64,29.51,9.64,11.89,3.75,94.76,80.16,...,18.37,7.4,26.74,3.86,45.49,80.19,74.76,5.65,21.19,2011


Unnamed: 0,district,prop_current_contraceptive,prop_unmet_need_family_planing,prop_antenatal_coverage,prop_antenatal_care4.,prop_institutional_delivery,prop_attendant_delivery,prop_caesarean,sex_ratio,dependency_ratio,prop_pop_rural,prop_pop_women,prop_pop_rural_women,prop_women_15.45y_overwomen,prop_married_women_15.45y,prop_married_..15y,prop_female_head,prop_registered_under5,year
0,Bagerhat,67.82,11.0,48.69,25.13,26.49,36.95,22.38,94.78,56.82,14.38,39.86,7.53,48.65,74.84,72.88,4.17,37.56,2014
1,Bandarban,63.64,13.64,100.0,75.0,75.0,75.0,50.0,84.93,45.16,0.0,77.52,0.0,54.79,57.5,63.46,25.49,40.0,2014
2,Barguna,73.3,8.22,37.97,47.46,27.54,34.43,22.9,87.82,68.24,9.48,28.64,5.13,44.84,86.5,77.54,8.06,25.1,2014
3,Barisal,64.26,10.08,64.01,32.39,46.33,56.91,31.22,94.74,59.68,28.48,30.97,13.91,51.1,77.42,69.16,6.99,23.4,2014
4,Bhola,67.28,10.01,29.44,15.62,11.27,18.42,4.01,101.71,69.35,9.99,26.58,5.06,47.39,82.49,75.2,3.53,17.64,2014


In [34]:
print(geo.shape)
print(dhs_2011.shape)
geo_dhs_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhs_2011, input_match='district')
print(geo_dhs_2011.shape)
print("#"*100)
print(geo.shape)
print(dhs_2014.shape)
geo_dhs_2014 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhs_2014, input_match='district')
print(geo_dhs_2014.shape)

(64, 2)
(64, 22)
(64, 4)
####################################################################################################
(64, 2)
(64, 19)
(64, 4)


In [35]:
print(dhs_2011.shape)
print(dhs_2014.shape)
dhs_2011 = dhs_2011.merge(geo_dhs_2011, how='left', left_on='district', right_on='Geo')
dhs_2014 = dhs_2014.merge(geo_dhs_2014, how='left', left_on='district', right_on='Geo')
dhs_2011 = dhs_2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
dhs_2014 = dhs_2014.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(dhs_2011.shape)
print(dhs_2014.shape)

(64, 22)
(64, 19)
(64, 22)
(64, 19)


### Renaming DHS data sets from 2014 to 2016


In [36]:
dhs_2011, dhs_2016 = intersect_dfs(input_df1= dhs_2011, input_df2= dhs_2014)

In [37]:
dhs_2011.equals(dhs_2016)

False

## Combine data

In [38]:
for ces, svrs, dhis, dgfp, dhs in zip(sorted(ces_2011['DistrictName']), sorted(svrs_2011['DistrictName']), 
                                 sorted(dhis_2011['DistrictName']), sorted(dgfp2011df['DistrictName']),
                                 sorted(dhs_2011['DistrictName'])):
    print("\n -------------------")
    print(ces, svrs, dhis, dgfp, dhs)
    print(f"CES vs SVRS: {ces == svrs}")
    print(f"CES vs DHIS: {ces == dhis}")
    print(f"CES vs DGFP: {ces == dgfp}")
    print(f"CES vs DHS: {ces == dhs}")
    print(f"SVRS vs DHIS: {svrs == dhis}")
    print(f"SVRS vs DGFP: {svrs == dgfp}")
    print(f"SVRS vs DHS: {svrs == dhs}")
    print(f"DHIS vs DGFP: {dhis == dgfp}")
    print(f"DHIS vs DHS: {dhis == dhs}")
    print(f"DHS vs DGFP: {dhs == dgfp}")


 -------------------
Bagerhat  Bagerhat  Bagerhat  Bagerhat  Bagerhat 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Bandarban  Bandarban  Bandarban  Bandarban  Bandarban 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barguna  Barguna  Barguna  Barguna  Barguna 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barisal  Barisal  Barisal  Barisal  Barisal 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs 

In [39]:
for ces, svrs, dhis, dgfp, dhs in zip(sorted(ces_2016['DistrictName']), sorted(svrs_2016['DistrictName']), 
                                 sorted(dhis_2016['DistrictName']), sorted(dgfp2016df['DistrictName']),
                                 sorted(dhs_2016['DistrictName'])):
    print("\n -------------------")
    print(ces, svrs, dhis, dgfp, dhs)
    print(f"CES vs SVRS: {ces == svrs}")
    print(f"CES vs DHIS: {ces == dhis}")
    print(f"CES vs DGFP: {ces == dgfp}")
    print(f"CES vs DHS: {ces == dhs}")
    print(f"SVRS vs DHIS: {svrs == dhis}")
    print(f"SVRS vs DGFP: {svrs == dgfp}")
    print(f"SVRS vs DHS: {svrs == dhs}")
    print(f"DHIS vs DGFP: {dhis == dgfp}")
    print(f"DHIS vs DHS: {dhis == dhs}")
    print(f"DHS vs DGFP: {dhs == dgfp}")


 -------------------
Bagerhat  Bagerhat  Bagerhat  Bagerhat  Bagerhat 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Bandarban  Bandarban  Bandarban  Bandarban  Bandarban 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barguna  Barguna  Barguna  Barguna  Barguna 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barisal  Barisal  Barisal  Barisal  Barisal 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs 

## Combining data

In [40]:
for a,b,c,d in zip(dgfp2011[DGFP2011a]['geo'], dgfp2011[DGFP2011b]['geo'], 
                   dgfp2011[DGFP2011c]['geo'], dgfp2011[DGFP2011d]['geo']):
    print(a,b,c,d)
    print(a == b)
    print(a == c)
    print(a == d)
    print(b == c)
    print(b == d)
    print(c == d)
    print("\n -----------")

5577 5577 5577 5577
True
True
True
True
True
True

 -----------
4044 4044 4044 4044
True
True
True
True
True
True

 -----------
4001 4001 4001 4001
True
True
True
True
True
True

 -----------
3061 3061 3061 3061
True
True
True
True
True
True

 -----------
1079 1079 1079 1079
True
True
True
True
True
True

 -----------
5527 5527 5527 5527
True
True
True
True
True
True

 -----------
3026 3026 3026 3026
True
True
True
True
True
True

 -----------
2022 2022 2022 2022
True
True
True
True
True
True

 -----------
3029 3029 3029 3029
True
True
True
True
True
True

 -----------
5010 5010 5010 5010
True
True
True
True
True
True

 -----------
4087 4087 4087 4087
True
True
True
True
True
True

 -----------
1004 1004 1004 1004
True
True
True
True
True
True

 -----------
4018 4018 4018 4018
True
True
True
True
True
True

 -----------
2046 2046 2046 2046
True
True
True
True
True
True

 -----------
6036 6036 6036 6036
True
True
True
True
True
True

 -----------
1042 1042 1042 1042
True
True
True
True


### Preparing 2011

In [41]:
drop_vars = ['DistrictName', 'DistrictGeo']
ces_2011 = ces_2011.sort_values(by='DistrictName').reset_index(drop=True)
svrs_2011 = svrs_2011.sort_values(by='DistrictName').reset_index(drop=True)
dhis_2011 = dhis_2011.sort_values(by='DistrictName').reset_index(drop=True)
dgfp2011df = dgfp2011df.sort_values(by='DistrictName').reset_index(drop=True)
dhs_2011 = dhs_2011.sort_values(by='DistrictName').reset_index(drop=True)

d2011 = [ces_2011, svrs_2011.drop(drop_vars,axis=1), dhis_2011.drop(drop_vars,axis=1),
         dgfp2011df.drop(drop_vars,axis=1), dhs_2011.drop(drop_vars, axis=1)]
df2011 = pd.concat(d2011, axis=1)
df2011.shape

(64, 468)

### Preparing 2016

In [42]:
ces_2016 = ces_2016.sort_values(by='DistrictName').reset_index(drop=True)
svrs_2016 = svrs_2016.sort_values(by='DistrictName').reset_index(drop=True)
dhis_2016 = dhis_2016.sort_values(by='DistrictName').reset_index(drop=True)
dgfp2016df = dgfp2016df.sort_values(by='DistrictName').reset_index(drop=True)
d2016 = [ces_2016, svrs_2016.drop(drop_vars,axis=1),
         dhis_2016.drop(drop_vars,axis=1), dgfp2016df.drop(drop_vars,axis=1),
         dhs_2016.drop(drop_vars, axis=1)]
df2016 = pd.concat(d2016, axis=1)
df2016.shape

(64, 468)

## Determining outcome variables 

In [43]:
print(df2011['rate_maternal_mortality'].mean())
print(df2016['rate_maternal_mortality'].mean())

3.7296874999999994
2.65078125


In [44]:
print(df2011['rate_under5y_mortality'].mean())
print(df2016['rate_under5y_mortality'].mean())

42.829375
39.446562500000006


In [45]:
print(df2011['prop_antenatal_coverage'].mean())
print(df2016['prop_antenatal_coverage'].mean())

37.97062499999999
56.92234374999999


In [46]:
print(df2011['prop_unmet_need_family_planing'].mean())
print(df2016['prop_unmet_need_family_planing'].mean())

13.562187500000004
11.942968750000002


```
print(df2011['ORS_RHF_ORT'].mean())
print(df2016['ORS_RHF_ORT'].mean())
```

In [47]:
df2011.to_csv('/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/all/all2011.csv', index=False, index_label=False)
df2016.to_csv('/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/all/all2016.csv', index=False, index_label=False)

In [48]:
df2011['year'] = 2011
df2016['year'] = 2016
tmp = pd.concat([df2011, df2016], axis=0)
tmp.to_csv('/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/all/all.csv')
print(tmp.shape)

(128, 469)
