In [1]:
import pandas as pd
import os

In [24]:
country_list = pd.read_csv('../../data/country_list/countrylist.csv')
print(country_list.head())

  Global North Global South
0      Andorra      Algeria
1    Australia       Angola
2      Austria    Argentina
3      Belgium        Aruba
4       Canada   Azerbaijan


In [25]:
north_countries = country_list[['Global North']].copy()
north_countries = north_countries.rename(columns={'Global North': 'Country'})
north_countries['Region'] = 'Global North'
north_countries = north_countries.dropna()
north_countries['Country'] = north_countries['Country'].str.strip()
south_countries = country_list[['Global South']].copy()
south_countries = south_countries.rename(columns={'Global South': 'Country'})
south_countries['Region'] = 'Global South'
south_countries = south_countries.dropna()
south_countries['Country'] = south_countries['Country'].str.strip()
all_countries = pd.concat([north_countries, south_countries]).reset_index(drop=True)
all_countries['Country_norm'] = all_countries['Country'].str.lower()
print(all_countries[['Country', 'Region']])

        Country        Region
0       Andorra  Global North
1     Australia  Global North
2       Austria  Global North
3       Belgium  Global North
4        Canada  Global North
..          ...           ...
95   Seychelles  Global South
96    Sri Lanka  Global South
97     Thailand  Global South
98     Viet Nam  Global South
99  Yemen, Rep.  Global South

[100 rows x 2 columns]


In [26]:
CHEX_Capita = pd.read_csv('../../data/current_health_expenditure/CHEX.csv',skiprows=4)
print(CHEX_Capita.head())

                  Country Name Country Code  \
0                        Aruba          ABW   
1  Africa Eastern and Southern          AFE   
2                  Afghanistan          AFG   
3   Africa Western and Central          AFW   
4                       Angola          AGO   

                                      Indicator Name     Indicator Code  1960  \
0  Current health expenditure per capita (current...  SH.XPD.CHEX.PC.CD   NaN   
1  Current health expenditure per capita (current...  SH.XPD.CHEX.PC.CD   NaN   
2  Current health expenditure per capita (current...  SH.XPD.CHEX.PC.CD   NaN   
3  Current health expenditure per capita (current...  SH.XPD.CHEX.PC.CD   NaN   
4  Current health expenditure per capita (current...  SH.XPD.CHEX.PC.CD   NaN   

   1961  1962  1963  1964  1965  ...       2016        2017       2018  \
0   NaN   NaN   NaN   NaN   NaN  ...        NaN         NaN        NaN   
1   NaN   NaN   NaN   NaN   NaN  ...  85.899217   90.013150  90.075653   
2   NaN 

In [28]:
CHEX_Capita['Country_norm'] = CHEX_Capita['Country Name'].str.lower()
chex_filtered = CHEX_Capita[CHEX_Capita['Country_norm'].isin(all_countries['Country_norm'])]

In [29]:
num_countries = chex_filtered['Country_norm'].nunique()
print(f"Total countries after filter: {num_countries}")

Total countries after filter: 97


In [30]:
chex_set = set(CHEX_Capita['Country_norm'])

In [31]:
missing_countries = all_countries[~all_countries['Country_norm'].isin(chex_set)].copy()
print(missing_countries)

   Country        Region Country_norm
40  Taiwan  Global North       taiwan


In [32]:
availability = []

In [33]:
years = list(map(str, range(2000, 2025)))

In [35]:
countries_in_chex = all_countries[all_countries['Country'].str.lower().isin(chex_set)].copy()
countries_in_chex['Country_norm'] = countries_in_chex['Country'].str.lower()

In [36]:
for _, row in countries_in_chex.iterrows():
    country_norm = row['Country_norm']
    chex_row = CHEX_Capita[CHEX_Capita['Country_norm'] == country_norm]
    
    country_dict = {
        'Country': row['Country'],
        'Region': row['Region']
    }
    
    for year in years:
        # True jika ada data (non-missing) untuk tahun tersebut
        country_dict[year] = pd.notna(chex_row[year].values[0]) if not chex_row.empty else False
    
    availability.append(country_dict)


In [37]:
availability_df = pd.DataFrame(availability)

In [38]:
print(availability_df.head())

     Country        Region  2000  2001  2002  2003  2004  2005  2006  2007  \
0    Andorra  Global North  True  True  True  True  True  True  True  True   
1  Australia  Global North  True  True  True  True  True  True  True  True   
2    Austria  Global North  True  True  True  True  True  True  True  True   
3    Belgium  Global North  True  True  True  True  True  True  True  True   
4     Canada  Global North  True  True  True  True  True  True  True  True   

   ...  2015  2016  2017  2018  2019  2020  2021  2022   2023   2024  
0  ...  True  True  True  True  True  True  True  True   True  False  
1  ...  True  True  True  True  True  True  True  True  False  False  
2  ...  True  True  True  True  True  True  True  True   True  False  
3  ...  True  True  True  True  True  True  True  True  False  False  
4  ...  True  True  True  True  True  True  True  True   True  False  

[5 rows x 27 columns]


In [39]:
availability_df.to_csv('../../data/current_health_expenditure/CHEX_availability_per_year_filtered.csv', index=False)

In [41]:
years_to_check = list(map(str, range(2010, 2023)))

In [49]:
missing_list = []

for year, df in missing_per_year.items():
    for _, row in df.iterrows():
        missing_list.append({
            'Country': row['Country'],
            'Region': row['Region'],
            'Year': year
        })

In [50]:
print(missing_list)

[{'Country': 'Liechtenstein', 'Region': 'Global North', 'Year': '2010'}, {'Country': 'Aruba', 'Region': 'Global South', 'Year': '2010'}, {'Country': 'Puerto Rico (US)', 'Region': 'Global South', 'Year': '2010'}, {'Country': 'Liechtenstein', 'Region': 'Global North', 'Year': '2011'}, {'Country': 'Aruba', 'Region': 'Global South', 'Year': '2011'}, {'Country': 'Puerto Rico (US)', 'Region': 'Global South', 'Year': '2011'}, {'Country': 'Liechtenstein', 'Region': 'Global North', 'Year': '2012'}, {'Country': 'Aruba', 'Region': 'Global South', 'Year': '2012'}, {'Country': 'Puerto Rico (US)', 'Region': 'Global South', 'Year': '2012'}, {'Country': 'Liechtenstein', 'Region': 'Global North', 'Year': '2013'}, {'Country': 'Aruba', 'Region': 'Global South', 'Year': '2013'}, {'Country': 'Puerto Rico (US)', 'Region': 'Global South', 'Year': '2013'}, {'Country': 'Liechtenstein', 'Region': 'Global North', 'Year': '2014'}, {'Country': 'Aruba', 'Region': 'Global South', 'Year': '2014'}, {'Country': 'Puerto

In [51]:
missing_df = pd.DataFrame(missing_list)

In [53]:
missing_df.to_csv('../../data/current_health_expenditure/CHEX_missing_2010_2022.csv', index=False)

In [58]:
#check all countries (original data)
years = list(map(str, range(2010, 2024))) 
CHEX_Capita[years] = CHEX_Capita[years].apply(pd.to_numeric, errors='coerce')

In [66]:
CHEX_Capita['Missing_Years'] = CHEX_Capita[years].isna().sum(axis=1)

In [67]:
complete_countries = CHEX_Capita[CHEX_Capita['Missing_Years'] == 0][['Country Name'] + years]

In [57]:
complete_countries.to_csv('../../data/current_health_expenditure/CHEXdata_available.csv', index=False)

In [68]:
print(complete_countries)

       Country Name         2010         2011         2012         2013  \
6           Andorra  3277.785156  3550.908203  2785.188965  2887.025879   
14          Austria  4800.793457  5167.817871  4966.095703  5235.341309   
35           Canada  5062.482910  5401.615234  5520.622070  5462.156738   
39            Chile   861.724365   984.448608  1070.512939  1171.770386   
45         Colombia   452.442810   501.844208   547.007324   580.964539   
54          Czechia  1509.447266  1649.159058  1509.775269  1508.799072   
55          Germany  4611.421875  5037.491699  4765.678711  5100.152344   
58          Denmark  6171.285156  6438.212402  6168.339355  6334.415527   
76             Fiji   127.656036   140.937042   153.565628   158.518066   
81   United Kingdom  3892.380127  4140.362305  4217.247559  4348.286621   
101         Hungary   983.110840  1063.432373   961.138367   990.631165   
114         Iceland  3614.038330  3937.315918  3784.383789  4095.676270   
116           Italy  3217

In [71]:
import pandas as pd

CHEX_Capita = pd.read_csv('../../data/current_health_expenditure/CHEX.csv', skiprows=4)

years = list(map(str, range(2010, 2023)))  # 2010-2022

CHEX_Capita[years] = CHEX_Capita[years].apply(pd.to_numeric, errors='coerce')

for year in years:
    CHEX_Capita[year] = CHEX_Capita[year].notna()

subset = CHEX_Capita[['Country Name'] + years]

# Filter countries that have complete data for 2010–2022
complete_2010_2022 = subset[subset[years].all(axis=1)]

# Display the results
print(f"Countries with complete data from 2010–2022 ({len(complete_2010_2022)} countries):")
print(complete_2010_2022[['Country Name'] + years])

# Optional: export to CSV
complete_2010_2022.to_csv('../../data/current_health_expenditure/CHEX_complete_2010_2022.csv', index=False)

Countries with complete data from 2010–2022 (234 countries):
                    Country Name  2010  2011  2012  2013  2014  2015  2016  \
1    Africa Eastern and Southern  True  True  True  True  True  True  True   
2                    Afghanistan  True  True  True  True  True  True  True   
3     Africa Western and Central  True  True  True  True  True  True  True   
4                         Angola  True  True  True  True  True  True  True   
5                        Albania  True  True  True  True  True  True  True   
..                           ...   ...   ...   ...   ...   ...   ...   ...   
260                        Samoa  True  True  True  True  True  True  True   
262                  Yemen, Rep.  True  True  True  True  True  True  True   
263                 South Africa  True  True  True  True  True  True  True   
264                       Zambia  True  True  True  True  True  True  True   
265                     Zimbabwe  True  True  True  True  True  True  True   

  