In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import pycountry
import pycountry_convert as pc

In [None]:
mmr_data = pd.read_csv("./data/estimates/estimates.csv")

In [None]:
df = mmr_data[mmr_data["parameter"]=="maternal_deaths"].drop(columns=["0.1", "0.9", "estimate_version", "parameter"]).reset_index().drop(columns="index").drop_duplicates()

In [None]:
df_average = pd.merge(df, df.set_index('iso_alpha_3_code').groupby('iso_alpha_3_code')['0.5'].mean(), on='iso_alpha_3_code')\
                                                          .rename(columns={'0.5_x':'0.5', '0.5_y':'average_md'})

In [None]:
df_average["average_md_q0.1"] = df_average['average_md'].drop_duplicates().quantile(0.1)
df_average["average_md_q1"] = df_average['average_md'].drop_duplicates().quantile(0.25)
df_average["average_md_q2"] = df_average['average_md'].drop_duplicates().quantile(0.50)
df_average["average_md_q3"] = df_average['average_md'].drop_duplicates().quantile(0.75)
df_average["average_md_q0.9"] = df_average['average_md'].drop_duplicates().quantile(0.9)
df_average["average_md_q0.99"] = df_average['average_md'].drop_duplicates().quantile(0.99)
df_average["average_md_mean"] = df_average['average_md'].drop_duplicates().mean()

In [None]:
def label(x):
    if (x["average_md"] < x["average_md_q0.1"]):
        return 'less_than_q0.1'
    elif (x["average_md_q0.1"] <= x["average_md"]) &  (x["average_md"] < x["average_md_q1"]):
        return 'between_q0.1_q1'    
    elif (x["average_md_q1"] <= x["average_md"]) &  (x["average_md"] < x["average_md_q2"]):
        return 'between_q1_q2'
    elif (x["average_md_q2"] <= x["average_md"]) & (x["average_md"] < x["average_md_q3"]):
        return 'between_q2_q3'
    elif (x["average_md_q3"] <= x["average_md"]) & (x["average_md"] < x["average_md_q0.9"]):
        return 'between_q3_q0.9'
    elif (x["average_md_q0.9"] <= x["average_md"]) & (x["average_md"] < x["average_md_q0.99"]):
        return 'between_q3_q0.99'
    elif (x["average_md"] > x["average_md_q0.99"]):
        return 'greater_than_q0.99'
    return 'not_applicable'

In [None]:
df_average["analysis_group"] = df_average.apply(lambda x: label(x), axis=1)

In [None]:
#df_average['analysis_group'].unique()

In [None]:
#df_average.groupby("analysis_group")['iso_alpha_3_code'].nunique()

In [None]:
#df_average[df_average["analysis_group"]=="between_q1_q2"]["iso_alpha_3_code"].nunique()

In [None]:
def iso_to_continent(x):
    # iso alpha 3 to alpha 2
    country_alpha2 = pycountry.countries.get(alpha_3=x).alpha_2
    
    # alpha 2 to continent
    try:
        country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
    except:
        country_continent_code = ''
        
    try:
        country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    except:
        country_continent_name = ''
    return country_continent_name

In [None]:
df_average['continent'] = df_average['iso_alpha_3_code'].apply(lambda x: iso_to_continent(x))

In [None]:
# df_average.groupby(['continent'])['iso_alpha_3_code'].nunique()

In [None]:
#df_average[df_average["analysis_group"]=="less_than_q0.1"]

In [None]:
df_average["round_0_5"] = df_average["0.5"].round(0).astype(int)

In [None]:
df_average.to_pickle("df_average.pkl")