In [336]:
import pandas as pd
import numpy as np

In [337]:
def standardize(df: pd.DataFrame, x: str):
    """
    
    This function will convert the indicator values from percents to values by\
        multiplying the percents to the country's MPI

    Args:
        df (pd.DataFrame): Original DataFrame
        x (str): Column Name
    """
    df.loc[:, x] = df["Multidimensional Poverty Index\n(MPI = H*A)"] * df[x] / 100

def get_weights(df: pd.DataFrame) -> pd.DataFrame:
    """
    Returns DataFrame with weight

    Args:
        df (pd.DataFrame): Original DataFrame

    Returns:
        pd.DataFrame: DataFrame column with weighted column
    """
    total_pop = df["Population 2021"].sum()
    df.loc[:, "Weight"] = df["Population 2021"] / total_pop
    return df

def get_hels(df: pd.DataFrame):
    """"
    Returns a dataframe with normal and weighted Health, Education, and Living Standards
    """
    df.loc[:, "Health"] = df["Nutrition"] + df["Child mortality"]
    df.loc[:, "Education"] = df["Years of schooling"] + df["School attendance"]
    df.loc[:, "Living Standards"] = (df["Cooking fuel"] + df["Sanitation"] + df["Drinking water"]
                                    + df["Electricity"] + df["Housing"] + df["Assets"])

    df.loc[:, "Health_w"] = df["Nutrition_w"] + df["Child mortality_w"]
    df.loc[:, "Education_w"] = df["Years of schooling_w"] + df["School attendance_w"]
    df.loc[:, "Living Standards_w"] = (df["Cooking fuel_w"] + df["Sanitation_w"] + df["Drinking water_w"]
                                    + df["Electricity_w"] + df["Housing_w"] + df["Assets_w"])


In [338]:
df_1 = pd.read_excel("../../data/raw/Global MPI 2023 National Results.xlsx",
                     sheet_name= "1.1 National MPI Results")
df_2 = pd.read_excel("../../data/raw/Global MPI 2023 National Results.xlsx",
                     sheet_name= "1.3 Contribut'n of Deprivations")
df = pd.merge(left=df_1, right=df_2, how= "left", on="ISO\ncountry numeric code")

In [339]:
df = df[["Country_x", 'Multidimensional Poverty Index\n(MPI = H*A)',
       'Intensity of deprivation among the poor\n(A) ', 'Population 2021', "World region_y", 'Nutrition', 'Child mortality',
       'Years of schooling', 'School attendance', 'Cooking \nfuel',
       'Sanitation', 'Drinking water', 'Electricity', 'Housing', 'Assets']]

In [340]:
indicator_list = ['Nutrition', 'Child mortality',
       'Years of schooling', 'School attendance', 'Cooking \nfuel',
       'Sanitation', 'Drinking water', 'Electricity', 'Housing', 'Assets']

for x in indicator_list:
    standardize(df, x)

In [341]:
df.rename(columns={"Country_x": "Country",
                   "Headcount ratio: Population in multidimensional poverty\n(H)": "Headcount",
                                  "Intensity of deprivation among the poor\n(A) ": "Intensity",
                                  'Multidimensional Poverty Index\n(MPI = H*A)': "MPI",
                                  "World region_y": "Region",
                                  "Cooking \nfuel": "Cooking fuel"}, inplace= True)

# for x in df.columns:
#     if x not in per_country_values.columns:
#         print(x)

In [342]:
regions = list(df["Region"].unique())
region_dfs = {}
region_dfs["Global"] = df
for region in regions:
    region_df = df[df["Region"] == region].drop(columns= "Region")
    region_dfs[region] = region_df

In [343]:
region_dfs["Arab States"]

Unnamed: 0,Country,MPI,Intensity,Population 2021,Nutrition,Child mortality,Years of schooling,School attendance,Cooking fuel,Sanitation,Drinking water,Electricity,Housing,Assets
8,Jordan,0.001526,35.3875,11148.278,0.000293,0.00028,0.000409,0.000407,5e-06,2.2e-05,3.2e-05,0.0,6e-05,1.9e-05
10,"Palestine, State of",0.00198,34.972581,5133.392,0.000827,0.000419,7.2e-05,0.000542,1.9e-05,5.3e-05,1.4e-05,0.0,2.9e-05,7e-06
17,Tunisia,0.002888,36.487344,12262.946,0.000609,9.6e-05,0.001149,0.000629,1.2e-05,0.000131,0.000118,2.1e-05,4.1e-05,8.2e-05
22,Algeria,0.005409,39.172627,44177.969,0.00131,0.000379,0.001602,0.001065,7.1e-05,0.000342,0.00023,9.2e-05,0.000246,7.4e-05
27,Libya,0.007421,37.134823,6735.277,0.002531,0.000363,0.002157,0.001447,1.3e-05,0.00018,0.00047,2.6e-05,0.000216,1.9e-05
44,Egypt,0.019682,37.570575,109262.178,0.006521,0.001355,0.004774,0.005673,,0.000471,0.000218,3.5e-05,0.000491,0.000143
49,Morocco,0.026697,41.977956,37076.584,0.005602,0.000904,0.008274,0.004227,0.001049,0.00133,0.001935,0.00058,0.002136,0.000658
53,Iraq,0.032694,37.860725,43533.592,0.008402,0.002411,0.009152,0.010766,9e-05,0.000784,0.000229,2.8e-05,0.000735,9.7e-05
87,Yemen,0.245166,50.5848,32981.641,0.06309,0.007893,0.028618,0.04595,0.016399,0.020646,0.016587,0.011018,0.025443,0.009523
92,Sudan,0.27944,53.401499,45657.202,0.049585,0.009268,0.04501,0.03645,0.024347,0.025591,0.019894,0.023671,0.028811,0.016815


In [344]:
needs_weights = ['Nutrition', 'Child mortality', 'Years of schooling',
       'School attendance', 'Cooking fuel', 'Sanitation', 'Drinking water',
       'Electricity', 'Housing', 'Assets']

for region in region_dfs.keys():
    #print(region)
    var_name = region.lower().replace(" ", "_").replace("-", "_") + "_df"
    df = get_weights(region_dfs[region])
    
    for col in needs_weights:
        col_name = col + "_w"
        df.loc[:, col_name] = df[col] * df["Weight"]


    output_filepath = f"/data/interm/{var_name}_2.csv"
    globals()[var_name] = df
    # globals()[var_name].fillna(0).to_csv(output_filepath)