In [147]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pycountry

pd.set_option('display.max_rows', None)   # Shows all rows in the DataFrame
pd.set_option('display.max_columns', None)  # Shows all columns in the DataFrame
pd.set_option('display.width', None)     # Ensures no line breaks for wide DataFrames
pd.set_option('display.max_colwidth', None)


# Import Data

In [148]:
df = pd.read_csv('data/SAP Datasets.csv')

# forward and backward filling
years_col = [str(i) for i in range(2022,2024)]
df[years_col] = df[years_col].apply(lambda row: row.ffill().bfill(), axis=1)

# drop unnecessary columns
df = df.drop(['Country Code', 'short description', 'long description', 'Indicator Code'], axis=1)

  df = pd.read_csv('data/SAP Datasets.csv')


# Ranking by Indicators

In [149]:
Indicators = pd.read_csv('data/Indicators_final.csv')

Indicators = Indicators[Indicators['DOES A HIGHER VALUE INCREASE POVERTY?'] != 'TBD']
Indicators

Unnamed: 0,INDICATOR NAMES,DOES A HIGHER VALUE INCREASE POVERTY?,Short Description,Long Description
0,Control of Corruption: Estimate,False,,
1,Multilateral debt service (% of public and publicly guaranteed debt service),True,"Multilateral debt service is the repayment of principal and interest to the World Bank, regional development banks, and other multilateral agencies.","Multilateral debt service is the repayment of principal and interest to the World Bank, regional development banks, and other multilateral agencies. public and publicly guaranteed debt service is the sum of principal repayments and interest actually paid in currency, goods, or services on long-term obligations of public debtors and long-term private obligations guaranteed by a public entity."
2,"Access to clean fuels and technologies for cooking, rural (% of rural population)",False,,"Access to clean fuels and technologies for cooking, rural is the proportion of rural population primarily using clean cooking fuels and technologies for cooking. Under WHO guidelines, kerosene is excluded from clean cooking fuels."
3,"Access to clean fuels and technologies for cooking, urban (% of urban population)",False,,"Access to clean fuels and technologies for cooking, urban is the proportion of urban population primarily using clean cooking fuels and technologies for cooking. Under WHO guidelines, kerosene is excluded from clean cooking fuels."
4,Access to clean fuels and technologies for cooking (% of population),False,,"Access to clean fuels and technologies for cooking is the proportion of total population primarily using clean cooking fuels and technologies for cooking. Under WHO guidelines, kerosene is excluded from clean cooking fuels."
5,"Access to electricity, rural (% of rural population)",False,,"Access to electricity, rural is the percentage of rural population with access to electricity."
6,"Access to electricity, urban (% of urban population)",False,,"Access to electricity, urban is the percentage of urban population with access to electricity."
7,Access to electricity (% of population),False,,"Access to electricity is the percentage of population with access to electricity. Electrification data are collected from industry, national surveys and international sources."
8,Compensation of employees (current LCU),False,,"Compensation of employees consists of all payments in cash, as well as in kind (such as food and housing), to employees in return for services rendered, and government contributions to social insurance schemes such as social security and pensions that provide benefits to employees."
9,Compensation of employees (% of expense),False,,"Compensation of employees consists of all payments in cash, as well as in kind (such as food and housing), to employees in return for services rendered, and government contributions to social insurance schemes such as social security and pensions that provide benefits to employees."


## Indicator Dataframes

In [150]:
results = {}

for index, row in Indicators.iterrows():
    indicator_name = row['INDICATOR NAMES']
    increase_poverty = row['DOES A HIGHER VALUE INCREASE POVERTY?']
    
    indicator_df = df[df['Indicator Name'] == indicator_name]
    indicator_df.set_index('Country Name', inplace=True)
    
    years_columns = [str(year) for year in range(2000, 2024)]
    country_year_data = indicator_df[years_columns]
    
    if increase_poverty == 'TRUE':
        direction = True  # Lower values get better ranks
    elif increase_poverty == 'FALSE':
        direction = False  # Higher values get better ranks
    
    ranked_countries = country_year_data.rank(axis=0, method='min', ascending=direction, na_option='keep')
    
    column_name = f"{indicator_name}"
    ranked_countries[column_name] = ranked_countries.mean(axis=1, skipna=True)
    
    results[column_name] = ranked_countries[[column_name]].sort_values(by='Country Name').reset_index()

In [151]:
combined_df = pd.concat([df.drop('Country Name', axis=1) for _, df in results.items()], axis=1)
combined_df.insert(0,'Country Name', results['Control of Corruption: Estimate']['Country Name'])

In [152]:
combined_df.head()

Unnamed: 0,Country Name,Control of Corruption: Estimate,Multilateral debt service (% of public and publicly guaranteed debt service),"Access to clean fuels and technologies for cooking, rural (% of rural population)","Access to clean fuels and technologies for cooking, urban (% of urban population)",Access to clean fuels and technologies for cooking (% of population),"Access to electricity, rural (% of rural population)","Access to electricity, urban (% of urban population)",Access to electricity (% of population),Compensation of employees (current LCU),Compensation of employees (% of expense),Adjusted savings: education expenditure (current US$),Adjusted net national income per capita (current US$),Terms of trade adjustment (constant LCU),Adequacy of social protection and labor programs (% of total welfare of beneficiary households),Adequacy of unemployment benefits and ALMP (% of total welfare of beneficiary households),Adequacy of social safety net programs (% of total welfare of beneficiary households),Adequacy of social insurance programs (% of total welfare of beneficiary households),Political Stability and Absence of Violence/Terrorism: Estimate,"Literacy rate, youth female (% of females ages 15-24)","Literacy rate, youth (ages 15-24), gender parity index (GPI)","Literacy rate, youth male (% of males ages 15-24)","Literacy rate, youth total (% of people ages 15-24)","Literacy rate, adult female (% of females ages 15 and above)","Literacy rate, adult male (% of males ages 15 and above)","Literacy rate, adult total (% of people ages 15 and above)","Compulsory education, duration (years)","Children out of school, primary","Children out of school, primary, female","Children out of school, female (% of female primary school age)","Children out of school, primary, male","Children out of school, male (% of male primary school age)",Children out of school (% of primary school age),"Adolescents out of school, female (% of female lower secondary school age)","Adolescents out of school, male (% of male lower secondary school age)",Adolescents out of school (% of lower secondary school age),Expenditure on primary education (% of government expenditure on education),Expenditure on secondary education (% of government expenditure on education),Expenditure on tertiary education (% of government expenditure on education),People using at least basic drinking water services (% of population),People using safely managed drinking water services (% of population),"Community health workers (per 1,000 people)",Pregnant women receiving prenatal care (%),People using at least basic sanitation services (% of population),Births attended by skilled health staff (% of total),Current health expenditure (% of GDP),Current health expenditure per capita (current US$),Proportion of people living below 50 percent of median income (%),Multidimensional poverty headcount ratio (UNDP) (% of population),Multidimensional poverty headcount ratio (World Bank) (% of population),"Employers, female (% of female employment) (modeled ILO estimate)","Employers, male (% of male employment) (modeled ILO estimate)","Employers, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)","Children in employment, study and work, female (% of female children in employment, ages 7-14)","Children in employment, study and work, male (% of male children in employment, ages 7-14)","Children in employment, study and work (% of children in employment, ages 7-14)","Children in employment, work only, female (% of female children in employment, ages 7-14)","Children in employment, work only, male (% of male children in employment, ages 7-14)","Children in employment, work only (% of children in employment, ages 7-14)","Children in employment, total (% of children ages 7-14)",Unemployment with advanced education (% of total labor force with advanced education),Unemployment with basic education (% of total labor force with basic education),Unemployment with intermediate education (% of total labor force with intermediate education),"Unemployment, total (% of total labor force) (national estimate)","Children in employment, wage workers, female (% of female children in employment, ages 7-14)","Children in employment, wage workers, male (% of male children in employment, ages 7-14)","Children in employment, wage workers (% of children in employment, ages 7-14)","Adolescent fertility rate (births per 1,000 women ages 15-19)",Wanted fertility rate (births per woman),Women who were first married by age 15 (% of women ages 20-24),Women who were first married by age 18 (% of women ages 20-24),Teenage mothers (% of women ages 15-19 who have had children or are currently pregnant),Age dependency ratio (% of working-age population),"Age dependency ratio, old (% of working-age population)","Age dependency ratio, young (% of working-age population)",Urban population (% of total population)
0,Afghanistan,191.73913,130.833333,180.833333,162.75,182.125,185.285714,188.708333,201.291667,46.166667,84.833333,122.0,213.153846,36.0,13.0,,1.0,17.0,199.782609,94.0,92.0,86.666667,91.0,89.5,89.0,89.0,143.291667,,,,,,,,,,5.5,92.125,115.333333,230.958333,144.958333,,43.545455,209.083333,112.5,22.05,197.45,,9.0,,206.333333,210.416667,204.541667,231.083333,230.166667,226.666667,2.0,3.0,4.0,18.0,17.0,19.0,11.0,98.0,63.5,90.5,72.833333,11.0,13.0,14.0,237.291667,6.0,14.333333,12.333333,2.0,257.708333,17.583333,258.541667,242.958333
1,Africa Eastern and Southern,,,181.541667,186.166667,186.916667,222.636364,236.375,238.208333,,35.266667,,171.863636,,,,,,,65.041667,56.833333,65.416667,64.208333,53.916667,55.875,55.125,199.166667,,,,,,,,,,,94.0,72.0,240.958333,,,50.666667,228.208333,138.666667,101.136364,164.136364,,,,140.916667,154.166667,157.916667,186.791667,194.375,192.625,,,,,,,,,,,,,,,232.958333,,,,,233.791667,42.333333,232.0,218.25
2,Africa Western and Central,,,201.916667,202.541667,200.666667,216.916667,215.833333,218.125,,45.857143,,168.636364,,,,,,,81.083333,78.625,77.875,78.833333,70.75,71.333333,71.291667,105.875,,,,,,,,,,21.777778,88.75,67.666667,215.416667,142.833333,14.0,61.666667,222.5,143.333333,192.681818,184.636364,,,,157.541667,185.125,183.25,209.625,216.458333,214.625,,,,,,,,103.666667,32.666667,70.666667,58.4,,,,246.333333,10.0,,,9.5,241.291667,47.708333,241.541667,184.916667
3,Albania,137.173913,74.0,127.541667,132.416667,131.333333,21.333333,80.833333,55.041667,55.142857,70.5,124.045455,116.181818,82.791667,7.333333,5.5,8.333333,7.833333,109.26087,15.0,19.875,20.75,18.125,11.333333,12.666667,11.333333,147.25,34.333333,33.8,54.8,24.2,30.0,40.111111,39.428571,8.428571,36.307692,3.0,82.75,48.25,138.041667,99.416667,,14.4,66.75,42.333333,93.272727,122.0,27.090909,2.0,18.25,172.25,119.375,134.583333,150.833333,164.875,158.791667,12.0,11.0,15.333333,8.0,9.0,9.333333,13.333333,92.75,69.4375,94.1875,127.136364,1.0,4.0,2.5,60.875,1.0,4.0,3.5,1.5,87.5,200.0,78.5,144.458333
4,Algeria,136.956522,27.125,71.625,52.5,70.541667,117.208333,95.083333,104.625,,,46.363636,122.636364,90.291667,,,,,174.26087,44.2,34.8,43.8,44.8,41.25,40.0,39.75,61.0,71.083333,53.555556,18.666667,40.444444,9.111111,25.125,26.0,25.5,32.5,,,19.0,133.458333,85.625,,28.166667,125.125,73.8,144.636364,132.318182,11.0,7.0,,101.75,84.833333,56.416667,90.791667,99.666667,96.375,9.0,11.0,10.0,3.0,1.0,2.0,5.0,98.0,74.5,85.0,128.0,6.0,4.0,4.0,38.791667,,1.0,1.0,,118.875,106.458333,145.291667,92.625


In [153]:
combined_df['Average_Rank'] = combined_df.iloc[:, 1:].mean(axis=1) 

combined_df_sorted = combined_df.sort_values(by='Average_Rank', ascending=True)

In [154]:
combined_df_sorted[['Country Name', 'Average_Rank']].reset_index()

Unnamed: 0,index,Country Name,Average_Rank
0,212,Singapore,30.594474
1,235,Switzerland,31.39811
2,40,Cayman Islands,32.090328
3,126,"Korea, Rep.",34.537855
4,173,Netherlands,34.546135
5,13,Australia,34.979301
6,110,Iceland,35.707088
7,153,Malta,36.416077
8,58,Denmark,36.657378
9,196,Portugal,36.753616


In [155]:
combined_df.to_csv('data/country_initial_rankings.csv')

## Normalizing