# NOTEBOOK

## Data sources:
- DHIS2 health indicators: 2011 and 2016
- DGFP health indicators: 2011 and 2016
- DHS raw variables: 2011 and 2014
- SVRS raw variables: 2012 and 2015
- CES indicators: 2011 and 2016

## Time points:
![Timpoints](timepoints.png)

In [1]:
import os
import re
import numpy as np
import pandas as pd
from collections import Counter
from fuzzywuzzy import fuzz

In [2]:
def intersect_dfs(input_df1, input_df2):
    df1 = input_df1.copy(deep=True)
    df2 = input_df2.copy(deep=True)
    subset_var = list(set(list(df1.columns)).intersection(set(list(df2.columns))))
    return df1[subset_var], df2[subset_var]

def read_ces(files_list, common=True):
    data_dict = {}
    for file in files_list:
        data_dict[file] = pd.read_csv(file, encoding='cp850')
        data_dict[file].rename(columns={'Survey.Units"':'geo'}, inplace=True)
        subset = [not bool(re.search(r"Division|Launch District|CC|KCC|RCC|DCC|SCC|CCC|BCC|Urban|Rural|CC Slum| Slum|National", geo)) for geo in data_dict[file]['Survey.Units']]
        print(Counter([not bool(re.search(r"Division|Launch District|CC|KCC|RCC|DCC|SCC|CCC|BCC|Urban|Rural|CC Slum| Slum|National", geo)) for geo in data_dict[file]['Survey.Units']]))
        data_dict[file] = data_dict[file].loc[subset,:]
        print(data_dict[file].shape)
    return data_dict

def match_districts(ref_df, ref_match, input_df, input_match):
    out = pd.DataFrame()
    for key, code in enumerate(input_df[input_match]):
        code_match = {}
        code_match['FuzzRatio'] = [fuzz.ratio(ref_code, code)  for ref_code in ref_df[ref_match]]
        code_match['Geo'] = code
        code_match['DistrictGeo'] = [value[0] for value in ref_df.values]
        code_match['DistrictName'] = [value[1]  for value in ref_df.values]
        code_match = pd.DataFrame.from_dict(code_match)
        out = out.append(code_match.sort_values('FuzzRatio', ascending=False).iloc[0,:])
    for var in list(out.columns):
        if out[var].dtype.kind == 'f':
            out[var] = out[var].astype(int)
            out[var] = out[var].astype(str)
            code_length = max([len(char) for char in out[var]])
            out[var] = out[var].str.pad(width=code_length, side='left', fillchar='0') 
    return out

def read_dgfp(files_list):
    data_dict = {}
    for file in files_list:
        data_dict[file] = pd.read_csv(file)
    return data_dict

def distrGO_rates(input_df, pattern, denominator_male, denominator_female):
    df = input_df.copy(deep=True)
    vars_rates = [var for var in df.columns if pattern in var]
    for var in vars_rates:
        if "_male" in var:
            df[var] = np.round(df[var]/(denominator_male/1000),4)
        else: 
            df[var] = np.round(df[var]/(denominator_female/1000), 4)
    return df, vars_rates

## GEOS

## CES Data

In [3]:
CES2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2011.csv'
CES2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2016.csv'
ces_list = [CES2011, CES2016]
ces = read_ces(files_list=ces_list, common=True)
ces.keys()

Counter({True: 64, False: 20})
(64, 44)
Counter({True: 64, False: 24})
(64, 52)


dict_keys(['/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2011.csv', '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/unicef/gdata/gdata_unicef_ces_2016.csv'])

In [4]:
ces_2011, ces_2016 = intersect_dfs(input_df1=ces[ces_list[0]], input_df2=ces[ces_list[1]])
print(ces_2011.shape)
print(ces_2016.shape)

(64, 32)
(64, 32)


In [5]:
ces_2011['DistrictCode'] = ces_2011['DistrictCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2011['DivisionCode'] = ces_2011['DivisionCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2011['DistrictGeo'] = ces_2011['DivisionCode'].str.cat(ces_2011['DistrictCode'], sep="")
ces_2011 = ces_2011.drop(['DivisionName', 'DivisionCode', 'Geo', 'DistrictCode', 'Year', 'FuzzRatio', 'Survey.Units'], axis=1)
ces_2011.head()

Unnamed: 0,TT1_Mother0-11MChildren,DistrictName,PENTA2_Children12M,PENTA1_Children23M,Measles_Children12M,Measles_Children23M,BCG_Children12M,VitACoverage_Children12-59M,TT5_Mother0-11MChildren,PENTA3_Children23M,...,PENTA3_Children12M,OPV2_Children23M,Fully_Children23M,TT4_Mother0-11MChildren,BCG_Children23M,PENTA1_Children12M,OPV3_Children23M,OPV1_Children23M,TT3_Mother0-11MChildren,DistrictGeo
0,96.7,Brahmanbaria,97.5,99.0,81.3,84.5,98.3,88.1,53.8,84.1,...,83.3,98.3,75.0,71.9,99.0,98.3,93.5,99.0,86.7,2012
1,90.0,Bagerhat,96.0,98.1,86.7,88.1,98.1,96.7,31.9,85.6,...,85.6,96.0,79.7,52.9,98.1,98.1,93.3,98.1,71.4,4001
2,89.0,Bandarban,90.2,94.3,79.2,82.3,94.3,83.3,38.6,84.4,...,83.8,90.8,76.2,54.3,94.3,94.3,89.4,93.8,72.4,2003
3,99.0,Barguna,98.6,100.0,82.6,88.1,100.0,96.7,27.6,87.9,...,87.3,98.6,79.9,47.6,100.0,100.0,95.4,99.5,72.4,1004
4,97.1,Barisal,98.1,98.6,82.6,86.8,98.6,79.0,31.0,86.9,...,86.9,98.1,77.4,52.4,98.6,98.6,94.5,98.6,77.1,1006


In [6]:
ces_2016['DistrictCode'] = ces_2016['DistrictCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2016['DivisionCode'] = ces_2016['DivisionCode'].astype(str).str.pad(width=2, side='left', fillchar='0')
ces_2016['DistrictGeo'] = ces_2016['DivisionCode'].str.cat(ces_2016['DistrictCode'], sep="")
ces_2016 = ces_2016.drop(['DivisionName', 'DivisionCode', 'Geo', 'DistrictCode', 'Year', 'FuzzRatio', 'Survey.Units'], axis=1)
ces_2016.head()

Unnamed: 0,TT1_Mother0-11MChildren,DistrictName,PENTA2_Children12M,PENTA1_Children23M,Measles_Children12M,Measles_Children23M,BCG_Children12M,VitACoverage_Children12-59M,TT5_Mother0-11MChildren,PENTA3_Children23M,...,PENTA3_Children12M,OPV2_Children23M,Fully_Children23M,TT4_Mother0-11MChildren,BCG_Children23M,PENTA1_Children12M,OPV3_Children23M,OPV1_Children23M,TT3_Mother0-11MChildren,DistrictGeo
0,98.1,Bagerhat,96.8,97.4,90.3,92.2,98.2,82.0,36.5,91.0,...,90.8,97.1,88.3,61.9,98.2,97.4,91.0,97.4,85.3,4001
1,94.8,Bandarban,94.8,96.3,86.3,89.8,99.0,84.1,65.3,87.8,...,87.8,94.8,83.9,79.6,99.0,96.3,87.8,96.3,86.1,2003
2,98.8,Barguna,97.9,98.8,91.0,94.9,99.7,96.8,36.3,93.6,...,93.0,98.3,91.1,64.4,99.7,98.8,93.6,98.8,88.4,1004
3,100.0,Barisal,99.3,99.1,93.1,97.1,99.7,100.0,60.0,96.0,...,95.5,99.3,94.6,79.3,99.7,99.1,96.0,99.1,96.7,1006
6,100.0,Bhola,99.8,99.8,95.4,96.6,99.8,98.4,56.1,94.5,...,94.5,99.8,91.9,79.0,99.8,99.8,94.5,99.8,94.1,1009


In [7]:
check_list = []
for var1, var2 in zip(sorted(ces_2011['DistrictName']), sorted(ces_2011['DistrictName'])):
    check_list.append(var1==var2)
print(all(check_list))
geo = ces_2011[['DistrictGeo', 'DistrictName']]
geo.head()

True


Unnamed: 0,DistrictGeo,DistrictName
0,2012,Brahmanbaria
1,4001,Bagerhat
2,2003,Bandarban
3,1004,Barguna
4,1006,Barisal


## SVRS Data 

In [8]:
SVRS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2012_clean.csv'
SVRS2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2015_clean.csv'

In [9]:
svrs_2011 = pd.read_csv(SVRS2011)
svrs_2011['district'] = svrs_2011['district'].str.replace(' Zila', '')
display(svrs_2011.head())
svrs_2016 = pd.read_csv(SVRS2016)
display(svrs_2016.head())
svrs_2011, svrs_2016 = intersect_dfs(input_df1=svrs_2011, input_df2=svrs_2016)
print(svrs_2011.columns)
print(svrs_2016.columns)

Unnamed: 0,district,prop_live_births,prop_registered_births,prop_deaths_rural,sex_ratio,dependency_ratio,prop_pop_rural,prop_pop_women,prop_pop_rural_women,prop_women_15.45y_overwomen,prop_married_women_15.45y,prop_married_..15y,rate_live_births,rate_fertility,rate_death,rate_child_death,rate_under5y_mortality,rate_infant_mortality,rate_maternal_mortality,year
0,Bagerhat,98.62,7,78,103.13,51.4,73.67,49.23,36.03,50.65,73.58,66.24,10.85,43.49,3.66,1.14,27.97,20.98,27.97,2012
1,Bandarban,98.61,12,56,107.07,63.84,45.42,48.29,22.1,49.68,73.23,70.54,19.26,80.3,3.48,0.98,56.34,51.64,4.69,2012
2,Barguna,99.3,7,75,100.62,56.56,64.75,49.85,32.28,49.91,82.21,74.9,20.55,82.58,5.77,5.03,38.87,21.2,3.53,2012
3,Barisal,98.96,14,68,105.58,54.58,63.4,48.64,30.64,48.66,69.59,65.86,19.36,81.8,5.97,0.76,52.77,50.13,2.64,2012
4,Bhola,99.02,51,73,110.73,61.07,71.88,47.45,34.15,49.2,73.6,68.84,18.19,77.89,5.13,1.52,66.23,59.6,9.93,2012


Unnamed: 0,district,prop_live_births,prop_registered_births,prop_attendant_delivery,prop_deaths_rural,sex_ratio,dependency_ratio,prop_pop_rural,prop_pop_women,prop_pop_rural_women,...,prop_married_women_15.45y,prop_married_..15y,rate_live_births,rate_fertility,rate_death,rate_child_death,rate_under5y_mortality,rate_infant_mortality,rate_maternal_mortality,year
0,Bagerhat,97.13,7.47,54.02,78,99.06,54.59,77.17,50.24,38.71,...,79.44,73.17,18.76,74.68,6.9,0.0,17.75,17.75,0.0,2015
1,Bandarban,98.59,1.41,15.49,71,99.61,60.72,78.4,50.1,39.54,...,67.15,66.83,19.56,79.33,5.79,0.0,28.57,28.57,0.0,2015
2,Barguna,99.21,3.15,50.39,80,100.28,50.84,69.5,49.93,34.5,...,80.72,76.27,16.34,64.11,5.79,1.96,31.75,23.81,7.94,2015
3,Barisal,97.99,2.01,67.74,34,98.42,51.13,25.83,50.4,13.01,...,76.11,71.08,16.45,62.76,5.15,2.01,27.32,19.13,2.73,2015
4,Bhola,97.51,17.4,29.01,94,103.37,64.28,90.97,49.17,44.69,...,78.37,72.66,22.63,94.44,4.0,3.28,31.16,19.83,0.0,2015


Index(['prop_married_women_15.45y', 'year', 'rate_fertility',
       'prop_deaths_rural', 'prop_pop_women', 'rate_live_births', 'rate_death',
       'prop_pop_rural', 'rate_maternal_mortality', 'rate_child_death',
       'rate_infant_mortality', 'prop_live_births', 'sex_ratio',
       'prop_married_..15y', 'prop_pop_rural_women', 'dependency_ratio',
       'prop_registered_births', 'district', 'prop_women_15.45y_overwomen',
       'rate_under5y_mortality'],
      dtype='object')
Index(['prop_married_women_15.45y', 'year', 'rate_fertility',
       'prop_deaths_rural', 'prop_pop_women', 'rate_live_births', 'rate_death',
       'prop_pop_rural', 'rate_maternal_mortality', 'rate_child_death',
       'rate_infant_mortality', 'prop_live_births', 'sex_ratio',
       'prop_married_..15y', 'prop_pop_rural_women', 'dependency_ratio',
       'prop_registered_births', 'district', 'prop_women_15.45y_overwomen',
       'rate_under5y_mortality'],
      dtype='object')


In [10]:
geo_svrs_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=svrs_2011, input_match='district')
geo_svrs_2016 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=svrs_2016, input_match='district')

In [11]:
print(svrs_2011.shape)
print(svrs_2016.shape)
svrs_2011 = svrs_2011.merge(geo_svrs_2011, how='left', left_on='district', right_on='Geo')
svrs_2016 = svrs_2016.merge(geo_svrs_2016, how='left', left_on='district', right_on='Geo')
svrs_2011 = svrs_2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
svrs_2016 = svrs_2016.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(svrs_2011.shape)
print(svrs_2016.shape)

(64, 20)
(64, 20)
(64, 20)
(64, 20)


In [12]:
print(svrs_2011['rate_maternal_mortality'].mean())
print(svrs_2016['rate_maternal_mortality'].mean())

3.7296874999999994
2.65078125


## DHIS2 Data

In [13]:
DHIS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhis2/health_indicators/District_2011_NAME.csv'
DHIS2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhis2/health_indicators/District_2016_NAME.csv'

In [14]:
dhis_2011 = pd.read_csv(DHIS2011)
dhis_2011['Geo'] = dhis_2011['True'].str.replace(" District", "")
display(dhis_2011.head())
dhis_2016 = pd.read_csv(DHIS2016)
dhis_2016['Geo'] = dhis_2016['True'].str.replace(" District", "")
display(dhis_2016.head())
dhis_2011, dhis_2016 = intersect_dfs(input_df1=dhis_2011, input_df2=dhis_2016)
print(dhis_2011.shape)
print(dhis_2016.shape)

Unnamed: 0,True,07Vaccine&LogisticsstockofUpazilaMunCC: Differences between Pentavalent doses and vial uses,07Vaccine&LogisticsstockofUpazilaMunCC: Penta vial Opening + Receive,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI Form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI Investigation form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI form E36 - E39 need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI AEFI line listing form need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI BCG diluent need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI BCG need with buffer,07Vaccine&LogisticsstockofUpazilaMunCC: Upazila EPI CC supply book need with buffer,...,02ChildHealth: IMCI Wasting (%),02ChildHealth: Neonatal Case fatality rate (EmOC),02ChildHealth: Percentage of diarrhea reported at facility,02ChildHealth: Percentage of pneumonia reported at facility,05Logistics: Percentage of functional ambulance,05Logistics: Percentage of functional x-ray,AntenatalCare(ANC): 1st Visit ANC,AntenatalCare(ANC): 2nd Visit ANC,AntenatalCare(ANC): 3rd & more ANC,Geo
0,Bagerhat District,358.0,128774.0,643.0,395.0,398.0,509.0,26253.0,26373.0,93.0,...,1.8,0.2009,11.35,3.7,68.85,37.025,,,,Bagerhat
1,Bandarban District,-667.0,62055.0,115.0,112.0,110.0,120.0,8212.5,8362.5,4.0,...,0.16,0.1165,8.7,8.39,65.24,61.538,,,,Bandarban
2,Barguna District,0.0,109797.0,3856.0,33.0,36.0,38.0,15523.5,15523.5,14.0,...,1.4,1.1355,7.1,4.07,71.36,46.073,,,,Barguna
3,Barishal District,492.0,297323.0,1827.0,552.0,551.0,552.0,40608.0,40608.0,27.0,...,1.0,0.3973,11.74,5.68,81.82,51.923,,,,Barishal
4,Bhola District,10.0,198050.0,3499.0,95.0,92.0,94.0,29293.5,29430.0,41.0,...,8.3,0.0774,13.54,3.74,76.34,67.742,,,,Bhola


Unnamed: 0,True,04Newborn: % of Nurse training on ETAT at SCANU,04Newborn: % of female baby admitted in SCANU reported individually,04Newborn: % of female baby admitted in SCANU reported monthly,04Newborn: % of functional Radiant warmer,04Newborn: % of male baby admitted in SCANU reported individually,04Newborn: % of male baby admitted in SCANU reported monthly,04Newborn: % of non functional Photo therapy unit,04Newborn: % of non-functioning Table Resuscitator with Radiant warmer,04Newborn: % of nurse allocated in SCANU among all nurses in the facility,...,02ChildHealth: IMCI Total Child,02ChildHealth: IMCI Underweight (%),02ChildHealth: IMCI Wasting (%),02ChildHealth: Neonatal Case fatality rate (EmOC),02ChildHealth: Percentage of diarrhea reported at facility,02ChildHealth: Percentage of pneumonia reported at facility,AntenatalCare(ANC): 1st Visit ANC,AntenatalCare(ANC): 2nd Visit ANC,AntenatalCare(ANC): 3rd & more ANC,Geo
0,Bagerhat District,245.0,41.5,39.4,42.6,58.5,60.6,49.3,35.3,38.1,...,215355.0,4.3,1.7,0.187,11.57,3.32,,,,Bagerhat
1,Bandarban District,,48.3,42.9,0.0,51.7,57.1,0.0,0.0,0.0,...,59810.0,1.6,0.35,0.3076,9.28,8.57,,,,Bandarban
2,Barguna District,,0.0,,,100.0,,,,,...,127829.0,2.8,1.1,1.2882,7.47,4.38,,,,Barguna
3,Barishal District,,100.0,,,0.0,,,,,...,209694.0,4.2,0.88,0.0354,13.06,6.35,,,,Barishal
4,Bhola District,379.3,37.0,36.4,23.1,63.0,63.6,12.6,0.0,13.7,...,171269.0,7.7,5.3,0.0986,13.35,4.6,,,,Bhola


(64, 350)
(64, 350)


In [15]:
geo_dhis_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhis_2011, input_match='Geo')
geo_dhis_2016 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhis_2016, input_match='Geo')

In [16]:
print(dhis_2011.shape)
print(dhis_2016.shape)
dhis_2011 = dhis_2011.merge(geo_dhis_2011, how='left', left_on='Geo', right_on='Geo')
dhis_2016 = dhis_2016.merge(geo_dhis_2016, how='left', left_on='Geo', right_on='Geo')
dhis_2011 = dhis_2011.drop(['FuzzRatio','Geo',], axis=1)
dhis_2016 = dhis_2016.drop(['FuzzRatio','Geo',], axis=1)
print(dhis_2011.shape)
print(dhis_2016.shape)

(64, 350)
(64, 350)
(64, 351)
(64, 351)


## DGFP Data

In [17]:
DGFP2011a = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_ngothanaprocess_2011.csv'
DGFP2011b = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_thanaprocess_2011.csv'
DGFP2011c = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_district_monthprocess_2011.csv'
DGFP2011d = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_ngodistrict_monthprocess_2011.csv'
DGFP2011e = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_distributionGO_distmonthProcess_2011.csv'
DGFP2011 = [DGFP2011a, DGFP2011b, DGFP2011c, DGFP2011d, DGFP2011e]
DGFP2016a = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_ngothanaprocess_2016.csv'
DGFP2016b = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp11subdistr_thanaprocess_2016.csv'
DGFP2016c = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_district_monthprocess_2016.csv'
DGFP2016d = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_ngodistrict_monthprocess_2016.csv'
DGFP2016e = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dgfp/data/data_dgfp_imp12distr_distributionGO_distmonthProcess_2016.csv'

DGFP2016 = [DGFP2016a, DGFP2016b, DGFP2016c, DGFP2016d, DGFP2016e]

In [18]:
dgfp2011 = read_dgfp(files_list=DGFP2011)
dgfp2016 = read_dgfp(files_list=DGFP2016)

In [21]:
dgfp2011[DGFP2011e]['distr_GOdistr_GOfemalepn'].sum()

0

In [19]:
dgfp2011df = pd.concat([dgfp2011[DGFP2011a], 
                        dgfp2011[DGFP2011b].drop('geo', axis=1), 
                        dgfp2011[DGFP2011c].drop('geo', axis=1),
                        dgfp2011[DGFP2011d].drop('geo', axis=1),
                        dgfp2011[DGFP2011e].drop('geo', axis=1)], axis=1)
print(dgfp2011df.shape)
print(dgfp2011df.columns)
dgfp2016df = pd.concat([dgfp2016[DGFP2016a], 
                        dgfp2016[DGFP2016b].drop('geo', axis=1), 
                        dgfp2016[DGFP2016c].drop('geo', axis=1), 
                        dgfp2016[DGFP2016d].drop('geo', axis=1),
                        dgfp2016[DGFP2016e].drop('geo', axis=1)], axis=1)
print(dgfp2016df.shape)
print(dgfp2016df.columns)

(64, 59)
Index(['imp11subdistr_ngothanaprocessNGO_Percent_Pill',
       'imp11subdistr_ngothanaprocessNGO_Percent_Condom',
       'imp11subdistr_ngothanaprocessNGO_Percent_Injectable',
       'imp11subdistr_ngothanaprocessNGO_Percent_IUD',
       'imp11subdistr_ngothanaprocessNGO_Percent_Implant',
       'imp11subdistr_ngothanaprocessNGO_Percent_PerMale',
       'imp11subdistr_ngothanaprocessNGO_Percent_PerFemale',
       'imp11subdistr_ngothanaprocessNGO_CAR', 'geo',
       'imp11subdistr_thanaprocessPercent_Pill',
       'imp11subdistr_thanaprocessPercent_Condom',
       'imp11subdistr_thanaprocessPercent_Injectable',
       'imp11subdistr_thanaprocessPercent_IUD',
       'imp11subdistr_thanaprocessPercent_Implant',
       'imp11subdistr_thanaprocessPercent_PerMale',
       'imp11subdistr_thanaprocessPercent_PerFemale',
       'imp11subdistr_thanaprocessCAR',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Pill',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent

In [162]:
dgfp2011df, dgfp2016df = intersect_dfs(input_df1= dgfp2011df, input_df2= dgfp2016df)

In [163]:
dgfp2011df["geo"] = dgfp2011df["geo"].astype(str)
print(dgfp2011df.shape)
dgfp2011df = dgfp2011df.merge(geo, how='left', left_on="geo", right_on="DistrictGeo")
print(dgfp2011df.shape)
dgfp2011df = dgfp2011df.drop('geo', axis=1)

(64, 59)
(64, 61)


In [164]:
dgfp2016df["geo"] = dgfp2016df["geo"].astype(str)
print(dgfp2016df.shape)
dgfp2016df = dgfp2016df.merge(geo, how='left', left_on="geo", right_on="DistrictGeo")
print(dgfp2016df.shape)
dgfp2016df = dgfp2016df.drop('geo', axis=1)

(64, 59)
(64, 61)


In [165]:
dgfp2011df.columns

Index(['distr_GOinj_mgs4', 'Imp12DistrNGOMonthThana_Percent_PerMale',
       'distr_GOmnp_saset',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_IUD',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_PerMale',
       'imp11subdistr_thanaprocessPercent_PerMale',
       'Imp12DistrNGOMonthThana_Percent_Implant', 'distr_GOimplant_remove',
       'distr_GOIUD_partum', 'imp11subdistr_thanaprocessCAR',
       'imp11subdistr_thanaprocessPercent_IUD', 'distr_GOifa_number',
       'imp12distr_monthprocessImp12DistrMonthThana_CAR',
       'Imp12DistrNGOMonthThana_Percent_Injectable',
       'imp11subdistr_thanaprocessPercent_Pill',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Injectable',
       'distr_GOShukhi', 'imp11subdistr_ngothanaprocessNGO_Percent_IUD',
       'imp11subdistr_ngothanaprocessNGO_Percent_Implant',
       'imp12distr_monthprocessImp12DistrMonthThana_Percent_Condom',
       'imp11subdistr_ngothanaprocessNGO_Percent_Pill',
       'Imp12Dis

## Demographics

In [166]:
woman15_45_2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2012.csv'
woman15_45_2016 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/bbs/data/data_svrs_zila_2015.csv'
TOTAL_POP2011 = 144043697
TOTAL_POP2016 = 162951560
data_svrs2011 = pd.read_csv(woman15_45_2011)
data_svrs2011['district'] = data_svrs2011['district'].str.replace(' Zila', '')
data_svrs2016 = pd.read_csv(woman15_45_2016)
data_svrs2011['total_bang_svrs'] = data_svrs2011['total_pop'].sum()
data_svrs2016['total_bang_svrs'] = data_svrs2016['total_pop'].sum()
data_svrs2011['total_bang'] = TOTAL_POP2011
data_svrs2016['total_bang'] = TOTAL_POP2016

data_svrs2011['total_pop_percent'] = data_svrs2011['total_pop']/data_svrs2011['total_pop'].sum()
data_svrs2016['total_pop_percent'] = data_svrs2016['total_pop']/data_svrs2016['total_pop'].sum()
data_svrs2011['total_pop_abs'] = data_svrs2011['total_pop_percent']*TOTAL_POP2011
data_svrs2016['total_pop_abs'] = data_svrs2016['total_pop_percent']*TOTAL_POP2016
data_svrs2011['woman15_45_abs'] = np.round(data_svrs2011['total_pop_abs'] * data_svrs2011['women_15.45y']/data_svrs2011['total_pop'])
data_svrs2016['woman15_45_abs'] = np.round(data_svrs2016['total_pop_abs'] * data_svrs2016['women_15.45y']/data_svrs2016['total_pop'])
display(data_svrs2011[['total_bang_svrs', 'total_pop', 'total_pop_percent', 'women_15.45y',
                      'total_pop_abs', 'woman15_45_abs', 'total_bang']].head())
display(data_svrs2016[['total_bang_svrs', 'total_pop', 'total_pop_percent', 'women_15.45y',
                      'total_pop_abs', 'woman15_45_abs', 'total_bang']].head())

Unnamed: 0,total_bang_svrs,total_pop,total_pop_percent,women_15.45y,total_pop_abs,woman15_45_abs,total_bang
0,1116845,13370,0.011971,3334,1724379.0,429999.0,144043697
1,1116845,11213,0.01004,2690,1446183.0,346939.0,144043697
2,1116845,13871,0.01242,3451,1788995.0,445088.0,144043697
3,1116845,19779,0.01771,4682,2550972.0,603855.0,144043697
4,1116845,16772,0.015017,3916,2163148.0,505061.0,144043697


Unnamed: 0,total_bang_svrs,total_pop,total_pop_percent,women_15.45y,total_pop_abs,woman15_45_abs,total_bang
0,939530,9274,0.009871,2330,1608477.0,404114.0,162951560
1,939530,3629,0.003863,895,629411.7,155228.0,162951560
2,939530,7773,0.008273,1981,1348145.0,343584.0,162951560
3,939530,45404,0.048326,11903,7874844.0,2064450.0,162951560
4,939530,15999,0.017029,3833,2774858.0,664793.0,162951560


In [167]:
geo_svrs_2011s = match_districts(ref_df=geo, ref_match='DistrictName', input_df=data_svrs2011, input_match='district')
geo_svrs_2016s = match_districts(ref_df=geo, ref_match='DistrictName', input_df=data_svrs2016, input_match='district')
print(data_svrs2011.shape)
print(data_svrs2016.shape)
data_svrs2011 = data_svrs2011.merge(geo_svrs_2011s, how='left', left_on='district', right_on='Geo')
data_svrs2016 = data_svrs2016.merge(geo_svrs_2016s, how='left', left_on='district', right_on='Geo')
data_svrs2011 = data_svrs2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
data_svrs2016 = data_svrs2016.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(data_svrs2011.shape)
print(data_svrs2016.shape)

(64, 47)
(64, 48)
(64, 47)
(64, 48)


## Recalculating indicators

In [168]:
print(sorted(data_svrs2011['DistrictName']) == sorted(dgfp2011df['DistrictName']))
print(sorted(data_svrs2016['DistrictName']) == sorted(dgfp2016df['DistrictName']))

True
True


In [169]:
dgfp2011df = dgfp2011df.sort_values("DistrictName")
data_svrs2011 = data_svrs2011.sort_values("DistrictName")
dgfp2011df, variables = distrGO_rates(input_df=dgfp2011df, pattern="distr_GO", 
              denominator_female=data_svrs2011['woman15_45_abs'], 
              denominator_male=data_svrs2011['total_bang'])
display(dgfp2011df[variables].head())

Unnamed: 0,distr_GOinj_mgs4,distr_GOmnp_saset,distr_GOimplant_remove,distr_GOIUD_partum,distr_GOifa_number,distr_GOShukhi,distr_GOInj_siringe,distr_GOImp_total,distr_GOImp_normal,distr_GOmisoprostol,...,distr_GOImp_Jadel,distr_GOCondom,distr_GOIUD_total,distr_GOPermanent_method,distr_GOper_male,distr_GOPill_total,distr_GOecp,distr_GOIUD_remove,distr_GOmrm_pack,distr_GOIUD_normal
2,0.0,0.0,0.0,0.0,0.0,3657.7126,0.0,8.4006,8.4006,0.0,...,0.0,4532.7463,8.3871,7.6412,0.0141,3657.7126,2.7163,0.0,0.0,8.3871
63,0.0,0.0,0.0,0.0,0.0,655.7759,0.0,1.7435,1.7435,0.0,...,0.0,886.7231,2.6429,1.9211,0.0041,655.7759,1.2523,0.0,0.0,2.6429
11,0.0,0.0,0.0,0.0,0.0,1302.0635,0.0,9.565,9.565,0.0,...,0.0,771.2989,4.3501,5.8866,0.0172,1302.0635,0.8026,0.0,0.0,4.3501
61,0.0,0.0,0.0,0.0,0.0,2935.3205,0.0,7.5193,7.5193,0.0,...,0.0,2969.484,12.7257,9.1668,0.0194,2935.3205,2.1889,0.0,0.0,12.7257
51,0.0,0.0,0.0,0.0,0.0,2403.5347,0.0,7.4587,7.4587,0.0,...,0.0,3192.5034,6.844,5.3922,0.013,2403.5347,2.6037,0.0,0.0,6.844


In [170]:
dgfp2016df = dgfp2016df.sort_values(by ="DistrictName")
data_svrs2016 = data_svrs2016.sort_values("DistrictName")
dgfp2016df, variables = distrGO_rates(input_df=dgfp2016df, pattern="distr_GO", 
              denominator_female=data_svrs2016['woman15_45_abs'], 
              denominator_male=data_svrs2016['total_bang'])
display(dgfp2016df[variables].head())

Unnamed: 0,distr_GOinj_mgs4,distr_GOmnp_saset,distr_GOimplant_remove,distr_GOIUD_partum,distr_GOifa_number,distr_GOShukhi,distr_GOInj_siringe,distr_GOImp_total,distr_GOImp_normal,distr_GOmisoprostol,...,distr_GOImp_Jadel,distr_GOCondom,distr_GOIUD_total,distr_GOPermanent_method,distr_GOper_male,distr_GOPill_total,distr_GOecp,distr_GOIUD_remove,distr_GOmrm_pack,distr_GOIUD_normal
2,0.0,0.0,2.5874,0.0728,164.3616,4134.4882,421.923,10.1809,9.6017,34.2216,...,0.5792,6076.5373,9.4708,4.4763,0.0045,4209.646,0.0,1.5513,0.1048,9.398
63,0.0,0.0,0.757,0.0518,41.7469,653.2773,108.9366,2.8958,2.8595,10.6344,...,0.0363,872.6982,3.194,1.5088,0.003,671.4688,0.0,0.3837,0.0985,3.1421
11,0.0,0.0,1.8511,0.0,97.8945,1175.802,228.126,8.5741,8.1495,17.2747,...,0.4245,745.9456,4.0778,2.7186,0.0057,1196.4034,0.0,0.3347,0.1102,4.0778
61,0.0,0.0,0.7047,0.0299,57.8425,602.7928,116.021,2.8073,2.6675,7.5774,...,0.1399,709.7482,1.5342,1.1744,0.008,611.6951,0.0,0.4192,0.0103,1.5042
51,0.0,1264.5546,1.0568,0.9263,2109.482,2015.356,977.2401,13.331,13.0502,38.4233,...,0.2808,3057.8958,9.115,3.5129,0.0088,2050.1934,0.0,0.241,95.9963,8.1887


In [171]:
dgfp2011df.head()

Unnamed: 0,distr_GOinj_mgs4,Imp12DistrNGOMonthThana_Percent_PerMale,distr_GOmnp_saset,imp12distr_monthprocessImp12DistrMonthThana_Percent_IUD,imp12distr_monthprocessImp12DistrMonthThana_Percent_PerMale,imp11subdistr_thanaprocessPercent_PerMale,Imp12DistrNGOMonthThana_Percent_Implant,distr_GOimplant_remove,distr_GOIUD_partum,imp11subdistr_thanaprocessCAR,...,distr_GOPill_total,distr_GOecp,distr_GOIUD_remove,Imp12DistrNGOMonthThana_Percent_PerFemale,imp11subdistr_ngothanaprocessNGO_Percent_Condom,distr_GOmrm_pack,imp11subdistr_ngothanaprocessNGO_Percent_Injectable,distr_GOIUD_normal,DistrictGeo,DistrictName
2,0.0,3.81,0.0,5.41,5.53,5.53,2.73,0.0,0.0,80.83,...,3657.7126,2.7163,0.0,4.04,10.66,0.0,21.19,8.3871,4001,Bagerhat
63,0.0,,0.0,7.68,4.76,4.76,,0.0,0.0,77.01,...,655.7759,1.2523,0.0,,,0.0,,2.6429,2003,Bandarban
11,0.0,3.19,0.0,3.64,7.91,7.91,6.61,0.0,0.0,75.47,...,1302.0635,0.8026,0.0,6.13,8.82,0.0,30.78,4.3501,1004,Barguna
61,0.0,0.55,0.0,4.35,2.46,2.46,4.01,0.0,0.0,72.62,...,2935.3205,2.1889,0.0,6.54,19.5,0.0,10.62,12.7257,1006,Barisal
51,0.0,1.48,0.0,2.52,2.71,2.71,1.45,0.0,0.0,72.23,...,2403.5347,2.6037,0.0,2.57,9.74,0.0,50.65,6.844,1009,Bhola


In [172]:
dgfp2016df.head()

Unnamed: 0,distr_GOinj_mgs4,Imp12DistrNGOMonthThana_Percent_PerMale,distr_GOmnp_saset,imp12distr_monthprocessImp12DistrMonthThana_Percent_IUD,imp12distr_monthprocessImp12DistrMonthThana_Percent_PerMale,imp11subdistr_thanaprocessPercent_PerMale,Imp12DistrNGOMonthThana_Percent_Implant,distr_GOimplant_remove,distr_GOIUD_partum,imp11subdistr_thanaprocessCAR,...,distr_GOPill_total,distr_GOecp,distr_GOIUD_remove,Imp12DistrNGOMonthThana_Percent_PerFemale,imp11subdistr_ngothanaprocessNGO_Percent_Condom,distr_GOmrm_pack,imp11subdistr_ngothanaprocessNGO_Percent_Injectable,distr_GOIUD_normal,DistrictGeo,DistrictName
2,0.0,4.69,0.0,4.34,5.33,5.33,4.35,2.5874,0.0728,81.39,...,4209.646,0.0,1.5513,4.87,10.06,0.1048,25.17,9.398,4001,Bagerhat
63,0.0,,0.0,8.22,9.05,9.05,,0.757,0.0518,77.95,...,671.4688,0.0,0.3837,,,0.0985,,3.1421,2003,Bandarban
11,0.0,3.2,0.0,3.21,9.46,9.46,5.77,1.8511,0.0,76.12,...,1196.4034,0.0,0.3347,3.25,8.16,0.1102,36.07,4.0778,1004,Barguna
61,0.0,0.97,0.0,4.07,2.98,2.98,6.84,0.7047,0.0299,75.77,...,611.6951,0.0,0.4192,4.07,23.38,0.0103,9.8,1.5042,1006,Barisal
51,0.0,2.31,1264.5546,3.15,3.21,3.21,4.96,1.0568,0.9263,79.61,...,2050.1934,0.0,0.241,2.55,8.09,95.9963,44.47,8.1887,1009,Bhola


## DHS Data

In [173]:
DHS2011 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhs/data/data_dhs_2011_clean.csv' 
DHS2014 = '/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/dhs/data/data_dhs_2014_clean.csv' 

In [174]:
dhs_2011 = pd.read_csv(DHS2011)
display(dhs2011.head())
dhs_2014 = pd.read_csv(DHS2014)
display(dhs2014.head())

Unnamed: 0,prop_married_women_15.45y,prop_institutional_delivery,sex_ratio,no_child_1.4y,no_pop_15.19y,no_women_15.45_men_..15y,prop_attendant_delivery,prop_registered_under5,prop_antenatal_care4.,no_pop_.35y,...,prop_pop_rural,prop_women_15.45y_overwomen,no_child_0.5y,no_married_..15y,prop_women_15.45y_overtotal,prop_antenatal_coverage,no_women_15.19y,dependency_ratio,DistrictName,prop_caesarean
0,80.33,30.66,93.31,84.303131,85.8685,533.714652,35.58,32.84,26.56,329.161569,...,11.48,46.74,129.040663,472.909582,20.96,29.91,48.002934,74.37,Bagerhat,15.74
1,75.0,0.0,84.44,30.457856,14.057472,89.030656,0.0,31.25,0.0,30.457856,...,0.0,35.56,46.85824,67.944448,45.16,30.0,4.685824,107.5,Bandarban,0.0
2,88.0,10.56,105.44,39.546169,48.026285,289.399605,16.3,41.4,20.97,170.726522,...,6.48,49.26,60.080481,266.458686,12.48,37.91,24.7696,70.73,Barguna,5.21
3,78.47,21.53,89.02,126.962485,147.444138,750.595733,26.92,38.53,26.53,434.809758,...,8.22,48.17,186.17623,641.548062,11.79,35.21,87.742465,75.53,Barisal,13.06
4,80.19,9.64,94.76,112.743292,114.755844,563.239951,11.89,21.19,29.51,275.296042,...,7.4,45.49,151.365187,490.364499,12.16,25.64,72.564288,80.16,Bhola,3.75


Unnamed: 0,no_births_last3y,prop_current_contraceptive,prop_unmet_need_family_planing,prop_antenatal_coverage,prop_antenatal_care4.,prop_institutional_delivery,prop_attendant_delivery,prop_caesarean,no_total_pop,no_pop_.15y,...,prop_pop_women,prop_pop_rural_women,prop_women_15.45y_overwomen,prop_women_15.45y_overtotal,prop_married_women_15.45y,prop_married_..15y,prop_female_head,prop_registered_under5,DistrictGeo,DistrictName
0,2716.58,67.82,11.0,48.69,25.13,26.49,36.95,22.38,978,529.133425,...,39.86,7.53,48.65,19.39,74.84,72.88,4.17,37.56,4001,Bagerhat
1,576.13,63.64,13.64,100.0,75.0,75.0,75.0,50.0,135,144.792893,...,77.52,0.0,54.79,42.48,57.5,63.46,25.49,40.0,2003,Bandarban
2,2453.31,73.3,8.22,37.97,47.46,27.54,34.43,22.9,934,328.807279,...,28.64,5.13,44.84,12.84,86.5,77.54,8.06,25.1,1004,Barguna
3,8068.56,64.26,10.08,64.01,32.39,46.33,56.91,31.22,3107,1277.101491,...,30.97,13.91,51.1,15.83,77.42,69.16,6.99,23.4,1006,Barisal
4,6990.08,67.28,10.01,29.44,15.62,11.27,18.42,4.01,2009,650.985938,...,26.58,5.06,47.39,12.59,82.49,75.2,3.53,17.64,1009,Bhola


In [175]:
print(geo.shape)
print(dhs_2011.shape)
geo_dhs_2011 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhs_2011, input_match='district')
print(geo_dhs_2011.shape)
print("#"*100)
print(geo.shape)
print(dhs2014.shape)
geo_dhs_2014 = match_districts(ref_df=geo, ref_match='DistrictName', input_df=dhs_2014, input_match='district')
print(geo_dhs_2014.shape)

(64, 2)
(64, 22)
(64, 4)
####################################################################################################
(64, 2)
(64, 33)
(64, 4)


In [176]:
print(dhs_2011.shape)
print(dhs_2014.shape)
dhs_2011 = dhs_2011.merge(geo_dhs_2011, how='left', left_on='district', right_on='Geo')
dhs_2014 = dhs_2014.merge(geo_dhs_2014, how='left', left_on='district', right_on='Geo')
dhs_2011 = dhs_2011.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
dhs_2014 = dhs_2014.drop(['FuzzRatio','Geo', 'district', 'year'], axis=1)
print(dhs_2011.shape)
print(dhs_2014.shape)

(64, 22)
(64, 19)
(64, 22)
(64, 19)


### Renaming DHS data sets from 2014 to 2016


In [177]:
dhs_2011, dhs_2016 = intersect_dfs(input_df1= dhs_2011, input_df2= dhs_2014)

In [178]:
dhs_2011.equals(dhs_2016)

False

## Combine data

In [179]:
for ces, svrs, dhis, dgfp, dhs in zip(sorted(ces_2011['DistrictName']), sorted(svrs_2011['DistrictName']), 
                                 sorted(dhis_2011['DistrictName']), sorted(dgfp2011df['DistrictName']),
                                 sorted(dhs2011['DistrictName'])):
    print("\n -------------------")
    print(ces, svrs, dhis, dgfp, dhs)
    print(f"CES vs SVRS: {ces == svrs}")
    print(f"CES vs DHIS: {ces == dhis}")
    print(f"CES vs DGFP: {ces == dgfp}")
    print(f"CES vs DHS: {ces == dhs}")
    print(f"SVRS vs DHIS: {svrs == dhis}")
    print(f"SVRS vs DGFP: {svrs == dgfp}")
    print(f"SVRS vs DHS: {svrs == dhs}")
    print(f"DHIS vs DGFP: {dhis == dgfp}")
    print(f"DHIS vs DHS: {dhis == dhs}")
    print(f"DHS vs DGFP: {dhs == dgfp}")


 -------------------
Bagerhat  Bagerhat  Bagerhat  Bagerhat  Bagerhat 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Bandarban  Bandarban  Bandarban  Bandarban  Bandarban 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barguna  Barguna  Barguna  Barguna  Barguna 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barisal  Barisal  Barisal  Barisal  Barisal 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs 

In [180]:
for ces, svrs, dhis, dgfp, dhs in zip(sorted(ces_2016['DistrictName']), sorted(svrs_2016['DistrictName']), 
                                 sorted(dhis_2016['DistrictName']), sorted(dgfp2016df['DistrictName']),
                                 sorted(dhs_2016['DistrictName'])):
    print("\n -------------------")
    print(ces, svrs, dhis, dgfp, dhs)
    print(f"CES vs SVRS: {ces == svrs}")
    print(f"CES vs DHIS: {ces == dhis}")
    print(f"CES vs DGFP: {ces == dgfp}")
    print(f"CES vs DHS: {ces == dhs}")
    print(f"SVRS vs DHIS: {svrs == dhis}")
    print(f"SVRS vs DGFP: {svrs == dgfp}")
    print(f"SVRS vs DHS: {svrs == dhs}")
    print(f"DHIS vs DGFP: {dhis == dgfp}")
    print(f"DHIS vs DHS: {dhis == dhs}")
    print(f"DHS vs DGFP: {dhs == dgfp}")


 -------------------
Bagerhat  Bagerhat  Bagerhat  Bagerhat  Bagerhat 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Bandarban  Bandarban  Bandarban  Bandarban  Bandarban 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barguna  Barguna  Barguna  Barguna  Barguna 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs DGFP: True

 -------------------
Barisal  Barisal  Barisal  Barisal  Barisal 
CES vs SVRS: True
CES vs DHIS: True
CES vs DGFP: True
CES vs DHS: True
SVRS vs DHIS: True
SVRS vs DGFP: True
SVRS vs DHS: True
DHIS vs DGFP: True
DHIS vs DHS: True
DHS vs 

## Combining data

In [181]:
for a,b,c,d in zip(dgfp2011[DGFP2011a]['geo'], dgfp2011[DGFP2011b]['geo'], 
                   dgfp2011[DGFP2011c]['geo'], dgfp2011[DGFP2011d]['geo']):
    print(a,b,c,d)
    print(a == b)
    print(a == c)
    print(a == d)
    print(b == c)
    print(b == d)
    print(c == d)
    print("\n -----------")

5577 5577 5577 5577
True
True
True
True
True
True

 -----------
4044 4044 4044 4044
True
True
True
True
True
True

 -----------
4001 4001 4001 4001
True
True
True
True
True
True

 -----------
3061 3061 3061 3061
True
True
True
True
True
True

 -----------
1079 1079 1079 1079
True
True
True
True
True
True

 -----------
5527 5527 5527 5527
True
True
True
True
True
True

 -----------
3026 3026 3026 3026
True
True
True
True
True
True

 -----------
2022 2022 2022 2022
True
True
True
True
True
True

 -----------
3029 3029 3029 3029
True
True
True
True
True
True

 -----------
5010 5010 5010 5010
True
True
True
True
True
True

 -----------
4087 4087 4087 4087
True
True
True
True
True
True

 -----------
1004 1004 1004 1004
True
True
True
True
True
True

 -----------
4018 4018 4018 4018
True
True
True
True
True
True

 -----------
2046 2046 2046 2046
True
True
True
True
True
True

 -----------
6036 6036 6036 6036
True
True
True
True
True
True

 -----------
1042 1042 1042 1042
True
True
True
True


### Preparing 2011

In [182]:
drop_vars = ['DistrictName', 'DistrictGeo']
ces_2011 = ces_2011.sort_values(by='DistrictName').reset_index(drop=True)
svrs_2011 = svrs_2011.sort_values(by='DistrictName').reset_index(drop=True)
dhis_2011 = dhis_2011.sort_values(by='DistrictName').reset_index(drop=True)
dgfp2011df = dgfp2011df.sort_values(by='DistrictName').reset_index(drop=True)
dhs_2011 = dhs_2011.sort_values(by='DistrictName').reset_index(drop=True)

d2011 = [ces_2011, svrs_2011.drop(drop_vars,axis=1), dhis_2011.drop(drop_vars,axis=1),
         dgfp2011df.drop(drop_vars,axis=1), dhs_2011.drop(drop_vars, axis=1)]
df2011 = pd.concat(d2011, axis=1)
df2011.shape

(64, 468)

### Preparing 2016

In [183]:
ces_2016 = ces_2016.sort_values(by='DistrictName').reset_index(drop=True)
svrs_2016 = svrs_2016.sort_values(by='DistrictName').reset_index(drop=True)
dhis_2016 = dhis_2016.sort_values(by='DistrictName').reset_index(drop=True)
dgfp2016df = dgfp2016df.sort_values(by='DistrictName').reset_index(drop=True)
d2016 = [ces_2016, svrs_2016.drop(drop_vars,axis=1),
         dhis_2016.drop(drop_vars,axis=1), dgfp2016df.drop(drop_vars,axis=1),
         dhs_2016.drop(drop_vars, axis=1)]
df2016 = pd.concat(d2016, axis=1)
df2016.shape

(64, 468)

## Determining outcome variables 

In [185]:
print(df2011['rate_maternal_mortality'].mean())
print(df2016['rate_maternal_mortality'].mean())

3.7296874999999994
2.65078125


In [186]:
print(df2011['rate_under5y_mortality'].mean())
print(df2016['rate_under5y_mortality'].mean())

42.829375
39.446562500000006


In [187]:
print(df2011['prop_antenatal_coverage'].mean())
print(df2016['prop_antenatal_coverage'].mean())

37.97062499999999
56.92234374999999


In [188]:
print(df2011['prop_unmet_need_family_planing'].mean())
print(df2016['prop_unmet_need_family_planing'].mean())

13.562187500000004
11.942968750000002


```
print(df2011['ORS_RHF_ORT'].mean())
print(df2016['ORS_RHF_ORT'].mean())
```

In [189]:
df2011.to_csv('/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/all/all2011.csv', index=False, index_label=False)
df2016.to_csv('/Users/edinhamzic/Symphony/wb_bangladesh/Bangladesh/output/all/all2016.csv', index=False, index_label=False)