In [1]:
# Import Dependencies
import pandas as pd

In [2]:
gdp_csv = "Resources/2017_GDP.csv"
expenditure_csv = "Resources/expenditure.csv"

In [3]:
gdp_df = pd.read_csv(gdp_csv)
expenditure_df = pd.read_csv(expenditure_csv)

In [4]:
gdp_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD)
0,Aruba,3092.4
1,Africa Eastern and Southern,964791.0
2,Afghanistan,18869.9
3,Africa Western and Central,680989.0
4,Angola,122124.0


In [5]:
expenditure_df.head()

Unnamed: 0,Country Name,Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD)
0,Aruba,,,711.9,
1,Afghanistan,113.2,636.5,,
2,Angola,1577.7,3012.7,15797.7,
3,Albania,348.1,470.2,1497.1,
4,Andorra,136.9,96.5,,


In [6]:
# Merge the two DataFrames together based on the Country Name they share
gdp_expenditure_df = pd.merge(gdp_df, expenditure_df, on=["Country Name"])
gdp_expenditure_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD),Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD)
0,Aruba,3092.4,,,711.9,
1,Afghanistan,18869.9,113.2,636.5,,
2,Angola,122124.0,1577.7,3012.7,15797.7,
3,Albania,13019.7,348.1,470.2,1497.1,
4,Andorra,3000.2,136.9,96.5,,


In [7]:
# Drop NA values from the domestic health expenditure column
gdp_expenditure_df = gdp_expenditure_df.dropna(how="any", subset=["Domestic Health Expenditure (in millions of USD)"])
gdp_expenditure_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD),Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD)
1,Afghanistan,18869.9,113.2,636.5,,
2,Angola,122124.0,1577.7,3012.7,15797.7,
3,Albania,13019.7,348.1,470.2,1497.1,
4,Andorra,3000.2,136.9,96.5,,
5,United Arab Emirates,385606.0,8759.8,,50946.0,


In [8]:
# Create a DataFrame that filters the data on only countries with a GDP greater than USD$10 billion
gdp_over_10bil_df = pd.DataFrame(gdp_expenditure_df.loc[
    gdp_expenditure_df["GDP (in millions of USD)"]>10000,:])
gdp_over_10bil_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD),Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD)
1,Afghanistan,18869.9,113.2,636.5,,
2,Angola,122124.0,1577.7,3012.7,15797.7,
3,Albania,13019.7,348.1,470.2,1497.1,
5,United Arab Emirates,385606.0,8759.8,,50946.0,
6,Argentina,643629.0,42537.0,35105.6,113902.3,3585.8


In [9]:
# Add a new column that calculates the percentage of GDP spent on domestic health
gdp_over_10bil_df["Health Expenditure (% GDP)"] = \
        gdp_over_10bil_df["Domestic Health Expenditure (in millions of USD)"] / \
        gdp_over_10bil_df["GDP (in millions of USD)"] * 100
gdp_over_10bil_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD),Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD),Health Expenditure (% GDP)
1,Afghanistan,18869.9,113.2,636.5,,,0.599897
2,Angola,122124.0,1577.7,3012.7,15797.7,,1.291884
3,Albania,13019.7,348.1,470.2,1497.1,,2.673641
5,United Arab Emirates,385606.0,8759.8,,50946.0,,2.271697
6,Argentina,643629.0,42537.0,35105.6,113902.3,3585.8,6.608932


In [10]:
# Sort the data by Health Expenditure (% GDP), Highest to Lowest
health_expenditure_sorted_df = gdp_over_10bil_df.sort_values(["Health Expenditure (% GDP)"],
                                           ascending=False)

# Reset Index
health_expenditure_sorted_df = health_expenditure_sorted_df.reset_index(drop=True)
health_expenditure_sorted_df.head()

Unnamed: 0,Country Name,GDP (in millions of USD),Domestic Health Expenditure (in millions of USD),Education Expenditure (in millions of USD),Final Consumption (in millions of USD),Research and Development Expenditure (in millions of USD),Health Expenditure (% GDP)
0,Cuba,96851.0,10145.0,,29867.0,417.1,10.474853
1,Sweden,541019.0,49459.9,40948.8,140786.2,18193.3,9.141989
2,Japan,4930840.0,448317.2,,957164.6,158180.4,9.092106
3,Germany,3681730.0,323946.3,179769.3,730471.5,112952.5,8.798752
4,Norway,398394.0,34982.8,31520.9,95638.5,8363.0,8.780956


In [11]:
# Print out the data for the highest percentage of GDP spent on domestic health
highest_health_expenditure = health_expenditure_sorted_df.loc[0, :]
highest_health_expenditure

Country Name                                                      Cuba
GDP (in millions of USD)                                       96851.0
Domestic Health Expenditure (in millions of USD)               10145.0
Education Expenditure (in millions of USD)                         NaN
Final Consumption (in millions of USD)                         29867.0
Research and Development Expenditure (in millions of USD)        417.1
Health Expenditure (% GDP)                                   10.474853
Name: 0, dtype: object

In [12]:
# Bonus: Print out the data for the lowest % of GDP spent on domestic health with one line of code
health_expenditure_sorted_df.loc[len(health_expenditure_sorted_df)-1, :]

Country Name                                                 Cameroon
GDP (in millions of USD)                                      35009.3
Domestic Health Expenditure (in millions of USD)                 50.9
Education Expenditure (in millions of USD)                     1104.5
Final Consumption (in millions of USD)                         3899.7
Research and Development Expenditure (in millions of USD)         NaN
Health Expenditure (% GDP)                                    0.14539
Name: 138, dtype: object