Import libraries

In [1]:
import pandas as pd
import numpy as np
import recordlinkage

Specify path directories

In [2]:
# Specify root folder
root_folder = r'C:\Users\habze\Nebyou\Development\facility_mapping_old'

# Combined Excel file
new_excel_file_path = root_folder + r'\Data\master_file_new.xlsx'

# Zone file
zone_mappings_file_path = root_folder + r'\Output\zone_mappings.xlsx'

# Woreda file
woreda_mappings_file_path = root_folder + r'\Output\woreda_mappings.xlsx'

# HP mapping path
hp_mappings_file_path = root_folder + r'\Output\hp_final_mappings.xlsx'

# HP mapping path
hc_mappings_file_path = root_folder + '\Output\hc_final_mappings.xlsx'

Read in data and rename columns where necessary

In [3]:
df_new_eCHIS_combined = pd.read_excel(new_excel_file_path, sheet_name='echis_master')
df_new_eCHIS_combined = df_new_eCHIS_combined.add_suffix('_echis')

df_dhis2 = pd.read_excel(new_excel_file_path, sheet_name='dhis2_master')
df_dhis2.rename(columns={'Region':'region_name_dhis2', 'Zone':'zone_name_dhis2', 'Woreda':'woreda_name_dhis2', 'PHCU':'hc_name_dhis2', 'Facility Name':'facility_name_dhis2'}, inplace=True)

print('# of records in DHIS2:\t\t', len(df_dhis2))

print('# of records in mfr:\t\t', len(df_new_eCHIS_combined))


# of records in DHIS2:		 18691
# of records in mfr:		 8255


Pre-Processing

In [4]:
# Make adjustments for Dire Dawa
df_new_eCHIS_combined['zone_name_echis'] = df_new_eCHIS_combined[['woreda_name_echis','zone_name_echis','region_name_echis']].apply(lambda x : x.woreda_name_echis if x.region_name_echis == 'Dire Dawa' else x.zone_name_echis, axis=1)

print('# of potential health posts in mfr', len(df_new_eCHIS_combined))

# of potential health posts in mfr 8255


Using recordlinkage to link dhis2 and echis regions

In [5]:
region_e = df_new_eCHIS_combined[['region_name_echis']].drop_duplicates().dropna().reset_index(drop=True)
region_d = df_dhis2[['region_name_dhis2']].drop_duplicates().dropna().reset_index(drop=True)

print(region_e)
print(region_d)

indexer = recordlinkage.Index()
indexer.full()
region_candidate_links = indexer.index(region_e, region_d)

compare_cl = recordlinkage.Compare()
compare_cl.string('region_name_echis', 'region_name_dhis2', method='jarowinkler', label='match_score')

regions_linked = compare_cl.compute(region_candidate_links, region_e, region_d)
regions_linked = regions_linked.reset_index()

regions_linked = regions_linked.merge(region_e.reset_index().rename({'index':'level_0'},axis=1), how='left', on='level_0')

regions_linked = regions_linked.merge(region_d.reset_index().rename({'index':'level_1'},axis=1), how='left', on='level_1')

regions_linked['ranked'] = regions_linked.groupby('level_0')['match_score'].rank(ascending=False)

regions_linked = regions_linked[regions_linked['ranked'] == 1]

# Add the mapped regions to the eCHIS dataframe
df_new_eCHIS_combined_with_mappings = df_new_eCHIS_combined.merge(regions_linked[['region_name_echis','region_name_dhis2']], how='left', left_on='region_name_echis', right_on='region_name_echis')

df_new_eCHIS_combined_with_mappings

   region_name_echis
0             Oromia
1             Amhara
2               SNNP
3               SWEP
4             Somali
5               Afar
6          Dire Dawa
7           Gambella
8             Harari
9             Tigray
10   Benshangul Gumz
11            Sidama
    region_name_dhis2
0             Oromiya
1              Amhara
2                SNNP
3              Somali
4                 SWE
5              Tigray
6   Benishangul Gumuz
7                Afar
8            Gambella
9              Sidama
10             Harari
11          Dire Dawa


Unnamed: 0,hp_location_id_echis,region_name_echis,zone_name_echis,woreda_name_echis,hc_name_echis,hp_name_echis,region_name_dhis2
0,72ac8938f9b94046a53d5f32ac3a20ed,Oromia,Guji,Adola rede,Chembe,Bechera,Oromiya
1,7915c5a64d65457db6da5fa743dc151f,Oromia,Guji,Adola rede,Chembe,Chanbe,Oromiya
2,7b7fe604c40d41ccae77c6392c7fc3ab,Oromia,Guji,Adola rede,Chembe,Kola,Oromiya
3,b3310b4b562148218bd64f1ef9ffdc4f,Oromia,Guji,Adola rede,Chembe,Michicha,Oromiya
4,d4f126b7f7f244e8a86d566125e2a63f,Oromia,Guji,Adola rede,Dhedale Chena,Dedela Chena,Oromiya
...,...,...,...,...,...,...,...
8250,8b31853419014ebe8b9a0bcec45bfaed,Oromia,Jimma,Shabe Sombo,Shabe,Yanga Dogama Health Post,Oromiya
8251,83e46dbd4d984ab799824e9579075b2c,Oromia,Jimma,Shabe Sombo,Sombo,Atro Gefere Health Post,Oromiya
8252,51ce2d0383784e34912cca4d4ae69af7,Oromia,Jimma,Shabe Sombo,Sombo,Dema Gemechu Health Post,Oromiya
8253,eff135bde47846cba024c57b2ff3e2be,Oromia,Jimma,Shabe Sombo,Sombo,Mirgano Beso Health Post,Oromiya
