In [1]:
#@formatter:off
%load_ext autoreload
%autoreload 2
#@formatter:on

# Imports

In [2]:
import pandas as pd
from mbench import (
    Parameter,
    ParameterData,
    CountryData,
    AdjacentDistricts,
    Country,
    District,
    Districts,
    OldDistrictToNewDistrict,
    demographic,
    util,
)


# Path

In [3]:
if sys.platform == 'linux':
    os.chdir('/mnt/d/Dropbox/benchmarking')
elif sys.platform =='Darwin':
    os.chdir('/Users/sepmein/Dropbox/benchmarking')

data_dir = os.path.abspath('Data')
openmalaria_xml_config_path = os.path.abspath('Data/openmalaria/12936_2017_2051_MOESM1_ESM.xml')
openmalaria_xml_config_path_pn2 = os.path.abspath("Data/openmalaria/VanDucAP2.xml")
openmalaria_xml_config_path_pn3 = os.path.abspath("Data/openmalaria/VanDucAP3.xml")

# District

In [4]:
ahafo = District(iso='gh-af', aliases=['AHAFO'])
ashanti = District(iso='gh-ah', aliases=['ashanti'])
bono = District(iso='gh-bo', aliases=['bono'])
bono_east = District(iso='gh-be', aliases=['bono_east'])
central = District(iso='gh-cp', aliases=['central'])
eastern = District(iso='gh-ep', aliases=['eastern'])
greater_accra = District(iso='gh-aa', aliases=['greater_accra'])
north_east = District(iso='gh-ne', aliases=['north_east'])
northern = District(iso='gh-np', aliases=['northern'])
oti = District(iso='gh-ot', aliases=['oti'])
savannah = District(iso='gh-sv', aliases=['savannah'])
upper_east = District(iso='gh-ue', aliases=['upper_east'])
upper_west = District(iso='gh-uw', aliases=['upper_west'])
volta = District(iso='gh-tv', aliases=['volta'])
western = District(iso='gh-wp', aliases=['western'])
western_north = District(iso='gh-wn', aliases=['western_north'])

gha_districts = Districts(district_list=[
    ahafo, ashanti, bono, bono_east, central, eastern, greater_accra, north_east, northern, oti, savannah, upper_east, upper_west, volta, western, western_north
])

# Old to New Districts

In [5]:
district_216_260 = pd.read_csv(data_dir + "/GHA/ADM1/216 to 260.csv")

In [6]:
gha_old_new_districts = OldDistrictToNewDistrict(
    start = district_216_260['216'],
    to= district_216_260['260'],
    districts = gha_districts,
    columns = ['old','new']
)

# Adjacents Districts

In [7]:
# load adjacent
gha_adjacent_provinces_data = pd.read_excel(
    data_dir + "/gha_adm_adjacent.xlsx", sheet_name="adm1_adjacent"
)
gha_adjacent_provinces_data = demographic.reformat.adm1_name(
    df=gha_adjacent_provinces_data,
    original_column_name="a",
    new_column_name="from",
    set_index=False,
)
gha_adjacent_provinces_data = demographic.reformat.adm1_name(
    df=gha_adjacent_provinces_data,
    original_column_name="b",
    new_column_name="to",
    set_index=False,
)

gha_adjacent_provinces = AdjacentDistricts(
    start= gha_adjacent_provinces_data['from'],
    to = gha_adjacent_provinces_data['to'],
    districts = gha_districts
)

# GHA country instance

In [8]:
gha = Country(districts=gha_districts,
              adjacent_districts=gha_adjacent_provinces,
              old_districts_to_new_districts = gha_old_new_districts
              )

True

# Initiate Country data instance

In [9]:
gha_data = CountryData(country = gha)

# Vector Resistance

In [10]:
gha_adm1_vector_resistance = pd.read_excel(
    data_dir
    + "/GHA/WHO_IR_data/MTM_DISCRIMINATING_CONCENTRATION_BIOASSAY_20211130.xlsx",
    sheet_name="Data",
)
gha_adm1_vector_resistance = gha_adm1_vector_resistance.groupby("ADMIN1").mean()
gha_adm1_vector_resistance = gha_adm1_vector_resistance["MORTALITY_ADJUSTED"]

In [11]:
vector_resistance = Parameter(
    name= 'vector resistance',
    aliases= ['vr', 'vector_resistance']
)
pm_gha_vector_resistance = ParameterData(
    parameter= vector_resistance,
    data=gha_adm1_vector_resistance,
    country=gha,
    from_old_districts_system= True
)

# Prevalence

In [12]:
incid = pd.read_csv(data_dir + "/GHA/Routine_data/District-level/incid.csv")
incid = incid[incid["year"] == 2018]
incid = demographic.reformat.adm1_name(
    df=incid, original_column_name="adm1", set_index=False
)
gha_adm2_population = incid[["adm1", "adm2", "pop"]]
gha_adm2_population = gha_adm2_population.rename(columns={"pop": "population"})

In [13]:
# calculate prevalence
incid = incid.groupby("adm1").sum()
prevalence = Parameter(name="Prevalence")
gha_prevalence = ParameterData(
    parameter=prevalence, data=incid["conf"] / incid["pop"], country=gha
)
gha_data.add_parameter(gha_prevalence)

# Demographic data

In [14]:
gha_demographic_adm1_2020_phc = pd.read_excel(
    data_dir + "/GHA/Demographic/2021_PHC/2021 PHC summary.xlsx"
)
gha_demographic_adm1_2020_mean = gha_demographic_adm1_2020_phc[["region", "mean_age"]]
gha_demographic_adm1_2020_mean = demographic.adm1_name(
    df=gha_demographic_adm1_2020_mean, original_column_name="region"
)
gha_demographic_adm1_2020_mean = gha_demographic_adm1_2020_mean.squeeze()

# ETA

In [15]:
eta = Parameter(name='eta', aliases=['eta'])
eta_data = ParameterData(
    parameter=eta,
    country=gha,
    data = 1 / (gha_demographic_adm1_2020_mean * 365)
)
gha_data.add_parameter(eta_data)

# ITN coverage

In [16]:
intervention = pd.read_excel(
    io=os.path.abspath(data_dir + "/GHA/MAP_District_Estimates/Maps_by_MAP_260districts/GHA_summaries/GHA_new_district_summaries.xlsx"),
    sheet_name="district_summaries",
)
intervention = demographic.reformat.adm1_name(
    df=intervention, original_column_name="REGION", set_index=False
)

intervention = intervention.rename(columns={"DISTRICT": "adm2"})
itn_coverage = intervention[["adm1", "adm2", "ITN_2018"]]
itn_coverage = pd.merge(
    left=gha_adm2_population, right=itn_coverage, left_on="adm2", right_on="adm2"
)

itn_coverage = itn_coverage.drop(columns=["adm1_x"], axis=1)
itn_coverage = itn_coverage.rename(columns={"adm1_y": "adm1"})
itn_coverage["population_itn"] = itn_coverage["population"] * itn_coverage["ITN_2018"]
itn_coverage_groupby_sum = itn_coverage.groupby(by="adm1").sum()
itn_coverage_groupby_sum["itn_coverage"] = (
        itn_coverage_groupby_sum["population_itn"] / itn_coverage_groupby_sum["population"]
)

itn_cov = Parameter(name='itn_cov')
itn_cov_data = ParameterData(
    parameter=itn_cov,
    country=gha,
    data= itn_coverage_groupby_sum["itn_coverage"]
)
itn_cov_gf = Parameter(name='itn_cov_gf')
itn_cov_gf_data = ParameterData(
    parameter=itn_cov,
    country=gha,
    data=itn_cov_data.data * 2.17 / 3.09 
)
itn_cov_ellie = Parameter(name='itn_cov_ellie')
itn_cov_ellie_data = ParameterData(
    parameter=itn_cov,
    country=gha,
    data=itn_cov_data.data * 2 / 2.3 
)

# ITN distribution

In [18]:
# ITN distributed
gha_intervention_data_excel = pd.read_excel(
    data_dir + "/GHA/Interventions/GHA_Intervention_data.xlsx",
    sheet_name="Data template "
)
gha_intervention_data_excel_2018 = gha_intervention_data_excel[gha_intervention_data_excel['year'] == 2018]
gha_intervention_data_excel_2018 = gha_intervention_data_excel_2018.dropna(subset=['adm1'])
gha_intervention_data_excel_2018 = gha_intervention_data_excel_2018.replace('Northern East', 'north east')
llins_distributed = gha_intervention_data_excel_2018.groupby('adm1').sum()['llins_num']
llins_distributed = demographic.reformat.adm1_name(df=llins_distributed,
                                                   original_column_name='adm1',
                                                   )

llins_distributed_parameter = Parameter('llins_distributed')
llins_distributed_parameter_data = ParameterData(
    parameter=llins_distributed_parameter,
    country=gha,
    data=llins_distributed
)
gha_data.add_parameter(llins_distributed_parameter_data)

# IRS coverage

In [17]:
irs_coverage = intervention[["adm1", "adm2", "IRS_2018"]]
irs_coverage = pd.merge(
    left=gha_adm2_population, right=irs_coverage, left_on="adm2", right_on="adm2"
)

irs_coverage = irs_coverage.drop(columns=["adm1_x"], axis=1)
irs_coverage = irs_coverage.rename(columns={"adm1_y": "adm1"})
irs_coverage["population_irs"] = irs_coverage["population"] * irs_coverage["IRS_2018"]
irs_coverage_groupby_sum = irs_coverage.groupby(by="adm1").sum()
irs_coverage_groupby_sum["irs_coverage"] = (
        irs_coverage_groupby_sum["population_irs"] / irs_coverage_groupby_sum["population"]
)

irs_coverage = Parameter('irs_coverage')
irs_coverage_data = ParameterData(
    parameter=irs_coverage,
    country=gha,
    data = irs_coverage_groupby_sum["irs_coverage"]
)
gha_data.add_parameter(irs_coverage_data)