In [1]:
"""
Project: Global Socieoeconomic Analysis

Objective: 
    Explore the relationship between how a countries enconomic conditions impacts the overall health of its citizens.

Tasks (as needed):
    Data Wrangling: Obtain, organize, clean, and preprocess datasets from reputable international sources.
    Visualization: Create informative and visually appealing visualizations to show global trends and patterns.
    Classification and regression
    Clustering
    Statistical Inference: Draw meaningful conclusions about how a countries financial status impacts its citizens health.

Team Members:
    Joshua Lee
    Will Whitehead
    Taha Khalid
    Blake Carlson
    Saurav Renju
    Gregory Markose
"""

'\nProject: Global Socieoeconomic Analysis\n\nObjective: \n    Explore the relationship between how a countries enconomic conditions impacts the overall health of its citizens.\n\nTasks (as needed):\n    Data Wrangling: Obtain, organize, clean, and preprocess datasets from reputable international sources.\n    Visualization: Create informative and visually appealing visualizations to show global trends and patterns.\n    Classification and regression\n    Clustering\n    Statistical Inference: Draw meaningful conclusions about how a countries financial status impacts its citizens health.\n\nTeam Members:\n    Joshua Lee\n    Will Whitehead\n    Taha Khalid\n    Blake Carlson\n    Saurav Renju\n    Gregory Markose\n'

In [2]:
# Import necessary libraries and modules

import pandas as pd
import altair as alt

In [3]:
"""
Import and clean datasets:

Data Sources Include:
    United Nations Statistics Division: GDP / GDP Per Capita - UN_2024_11_27_GDP.csv
    United Nations Statistics Division: Health Personnel - UN_2024_11_27_HealthPersonnel.csv
    World Health Organization: Life Expectancy Data - WHO_2024_08_02_LifeExpectancy.csv
"""

# Read UN GDP Data
un_gdp = pd.read_csv("data/UN_2024_11_27_GDP.csv", skiprows=1)

# Rename some of the columns to have descriptive names
un_gdp.rename(columns={"Region/Country/Area": "Region_Code", "Unnamed: 1": "Country_Region_Name"}, inplace=True)

# Display GDP Data
un_gdp

Unnamed: 0,Region_Code,Country_Region_Name,Year,Series,Value,Footnotes,Source
0,1,"Total, all countries or areas",1995,GDP in current prices (millions of US dollars),31290901,,"United Nations Statistics Division, New York, ..."
1,1,"Total, all countries or areas",2005,GDP in current prices (millions of US dollars),47816593,,"United Nations Statistics Division, New York, ..."
2,1,"Total, all countries or areas",2010,GDP in current prices (millions of US dollars),66633612,,"United Nations Statistics Division, New York, ..."
3,1,"Total, all countries or areas",2015,GDP in current prices (millions of US dollars),75440153,,"United Nations Statistics Division, New York, ..."
4,1,"Total, all countries or areas",2020,GDP in current prices (millions of US dollars),85483570,,"United Nations Statistics Division, New York, ..."
...,...,...,...,...,...,...,...
6769,716,Zimbabwe,2010,GDP real rates of growth (percent),19.7,,"United Nations Statistics Division, New York, ..."
6770,716,Zimbabwe,2015,GDP real rates of growth (percent),1.8,,"United Nations Statistics Division, New York, ..."
6771,716,Zimbabwe,2020,GDP real rates of growth (percent),-5.3,,"United Nations Statistics Division, New York, ..."
6772,716,Zimbabwe,2021,GDP real rates of growth (percent),6.3,,"United Nations Statistics Division, New York, ..."


In [4]:
# Read UN Health Personnel Data
un_health_personnel = pd.read_csv("data/UN_2024_11_27_HealthPersonnel.csv", skiprows=1)

# Rename a few of the columns to have more descriptive names
un_health_personnel.rename(columns={
    "Region/Country/Area": "Region_Code", 
    "Unnamed: 1": "Country_Region_Name"},
    inplace=True
)

# Display UN Health Personnel Data
un_health_personnel

Unnamed: 0,Region_Code,Country_Region_Name,Year,Series,Value,Footnotes,Source
0,4,Afghanistan,2001,Health personnel: Physicians (number),4104,,"World Health Organisation (WHO), Geneva, WHO G..."
1,4,Afghanistan,2001,Health personnel: Physicians (per 1000 populat...,0.2,,"World Health Organisation (WHO), Geneva, WHO G..."
2,4,Afghanistan,2001,Health personnel: Pharmacists (number),525,,"World Health Organisation (WHO), Geneva, WHO G..."
3,4,Afghanistan,2001,Health personnel: Pharmacists (per 1000 popula...,0.0,,"World Health Organisation (WHO), Geneva, WHO G..."
4,4,Afghanistan,2005,Health personnel: Pharmacists (number),900,,"World Health Organisation (WHO), Geneva, WHO G..."
...,...,...,...,...,...,...,...
6564,716,Zimbabwe,2022,Health personnel: Dentists (number),2703,,"World Health Organisation (WHO), Geneva, WHO G..."
6565,716,Zimbabwe,2022,Health personnel: Dentists (per 1000 population),0.2,,"World Health Organisation (WHO), Geneva, WHO G..."
6566,716,Zimbabwe,2022,Health personnel: Pharmacists (number),1902,,"World Health Organisation (WHO), Geneva, WHO G..."
6567,716,Zimbabwe,2022,Health personnel: Pharmacists (per 1000 popula...,0.1,,"World Health Organisation (WHO), Geneva, WHO G..."


In [5]:
# Read WHO Life Expectancy Data
who_life_expectancy = pd.read_csv("data/WHO_2024_08_02_LifeExpectancy.csv")

# Display Life Expectancy Data
who_life_expectancy

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,WHOSIS_000001,Life expectancy at birth (years),text,AFR,Africa,Country,LSO,Lesotho,Year,2021,...,,,47.80,,49.70,48.7 [47.8-49.7],,,EN,2024-08-02T05:00:00.000Z
1,WHOSIS_000001,Life expectancy at birth (years),text,AFR,Africa,Country,CAF,Central African Republic,Year,2021,...,,,48.45,,50.92,49.6 [48.5-50.9],,,EN,2024-08-02T05:00:00.000Z
2,WHOSIS_000001,Life expectancy at birth (years),text,AFR,Africa,Country,LSO,Lesotho,Year,2021,...,,,50.49,,52.57,51.5 [50.5-52.6],,,EN,2024-08-02T05:00:00.000Z
3,WHOSIS_000001,Life expectancy at birth (years),text,AFR,Africa,Country,SWZ,Eswatini,Year,2021,...,,,50.73,,52.82,51.6 [50.7-52.8],,,EN,2024-08-02T05:00:00.000Z
4,WHOSIS_000001,Life expectancy at birth (years),text,EMR,Eastern Mediterranean,Country,SOM,Somalia,Year,2021,...,,,50.62,,53.10,51.7 [50.6-53.1],,,EN,2024-08-02T05:00:00.000Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24415,WHOSIS_000015,Life expectancy at age 60 (years),text,EUR,Europe,Country,CHE,Switzerland,Year,2000,...,,,24.78,,24.87,24.8 [24.8-24.9],,,EN,2024-08-02T05:00:00.000Z
24416,WHOSIS_000015,Life expectancy at age 60 (years),text,WPR,Western Pacific,Country,AUS,Australia,Year,2000,...,,,24.79,,24.91,24.9 [24.8-24.9],,,EN,2024-08-02T05:00:00.000Z
24417,WHOSIS_000015,Life expectancy at age 60 (years),text,EUR,Europe,Country,FRA,France,Year,2000,...,,,25.24,,25.33,25.3 [25.2-25.3],,,EN,2024-08-02T05:00:00.000Z
24418,WHOSIS_000015,Life expectancy at age 60 (years),text,AMR,Americas,Country,NIC,Nicaragua,Year,2000,...,,,25.48,,25.72,25.6 [25.5-25.7],,,EN,2024-08-02T05:00:00.000Z


In [6]:
# Import other necessary data
    # What other data will be necessary?
    # How to incorporate the WHO data?
print()




In [7]:
# Combine UN data into a merged dataframe.

# Use an inner join to only use data that matches up nicely, preventing "NaN" values from being in the data keeping it clean.
un_merged_personnel_gdp = pd.merge(un_gdp, un_health_personnel, on=["Region_Code", "Year"], how="inner")

# Rename the columns to be more specific
un_merged_personnel_gdp.rename(columns={
    "Country_Region_Name_x": "Region_Name",
    "Value_x": "GDP",
    "Value_y": "Health_Personnel"
}, inplace=True)

# Display the combined data frame.
un_merged_personnel_gdp

Unnamed: 0,Region_Code,Region_Name,Year,Series_x,GDP,Footnotes_x,Source_x,Country_Region_Name_y,Series_y,Health_Personnel,Footnotes_y,Source_y
0,4,Afghanistan,2005,GDP in current prices (millions of US dollars),6475,,"United Nations Statistics Division, New York, ...",Afghanistan,Health personnel: Pharmacists (number),900,,"World Health Organisation (WHO), Geneva, WHO G..."
1,4,Afghanistan,2005,GDP in current prices (millions of US dollars),6475,,"United Nations Statistics Division, New York, ...",Afghanistan,Health personnel: Pharmacists (per 1000 popula...,0.0,,"World Health Organisation (WHO), Geneva, WHO G..."
2,4,Afghanistan,2005,GDP in current prices (millions of US dollars),6475,,"United Nations Statistics Division, New York, ...",Afghanistan,Health personnel: Nurses and midwives (number),14930,,"World Health Organisation (WHO), Geneva, WHO G..."
3,4,Afghanistan,2005,GDP in current prices (millions of US dollars),6475,,"United Nations Statistics Division, New York, ...",Afghanistan,Health personnel: Nurses and midwives personne...,0.6,,"World Health Organisation (WHO), Geneva, WHO G..."
4,4,Afghanistan,2010,GDP in current prices (millions of US dollars),15145,,"United Nations Statistics Division, New York, ...",Afghanistan,Health personnel: Physicians (number),6901,,"World Health Organisation (WHO), Geneva, WHO G..."
...,...,...,...,...,...,...,...,...,...,...,...,...
16171,716,Zimbabwe,2022,GDP real rates of growth (percent),3.5,,"United Nations Statistics Division, New York, ...",Zimbabwe,Health personnel: Dentists (number),2703,,"World Health Organisation (WHO), Geneva, WHO G..."
16172,716,Zimbabwe,2022,GDP real rates of growth (percent),3.5,,"United Nations Statistics Division, New York, ...",Zimbabwe,Health personnel: Dentists (per 1000 population),0.2,,"World Health Organisation (WHO), Geneva, WHO G..."
16173,716,Zimbabwe,2022,GDP real rates of growth (percent),3.5,,"United Nations Statistics Division, New York, ...",Zimbabwe,Health personnel: Pharmacists (number),1902,,"World Health Organisation (WHO), Geneva, WHO G..."
16174,716,Zimbabwe,2022,GDP real rates of growth (percent),3.5,,"United Nations Statistics Division, New York, ...",Zimbabwe,Health personnel: Pharmacists (per 1000 popula...,0.1,,"World Health Organisation (WHO), Geneva, WHO G..."


In [8]:
# Clean up the data by removing the unneeded columns like the footnotes.
un_merged_personnel_gdp.drop(columns=["Footnotes_x", "Country_Region_Name_y", "Footnotes_y"], inplace=True)


# Prints the units / type of measurement associated with the GDP and healthcare personnel values.
print("GDP measurement types: ", un_merged_personnel_gdp["Series_x"].unique())
print()
print("Healthcare personnel measurement types: ", un_merged_personnel_gdp["Series_y"].unique())

GDP measurement types:  ['GDP in current prices (millions of US dollars)'
 'GDP per capita (US dollars)'
 'GDP in constant 2015 prices (millions of US dollars)'
 'GDP real rates of growth (percent)']

Healthcare personnel measurement types:  ['Health personnel: Pharmacists (number)'
 'Health personnel: Pharmacists (per 1000 population)'
 'Health personnel: Nurses and midwives (number)'
 'Health personnel: Nurses and midwives personnel (per 1000 population)'
 'Health personnel: Physicians (number)'
 'Health personnel: Physicians (per 1000 population)'
 'Health personnel: Dentists (number)'
 'Health personnel: Dentists (per 1000 population)']


In [9]:
# Filter the data so only GDP measurements by GDP per capita are used, and healthcare personnel per 1000 people are used.
    # This standardizes the data values by population size.
un_merged_gdp_filtered = un_merged_personnel_gdp[un_merged_personnel_gdp["Series_x"].str.contains("GDP per capita", case=False)]

# Filter the data again so only data containing healthcare personnel counts per 1000 people are included.
un_merged_filtered =un_merged_gdp_filtered[un_merged_gdp_filtered["Series_y"].str.contains("per 1000 population", case=False)]

# Display the merged and filtered data
un_merged_filtered

Unnamed: 0,Region_Code,Region_Name,Year,Series_x,GDP,Source_x,Series_y,Health_Personnel,Source_y
19,4,Afghanistan,2005,GDP per capita (US dollars),265,"United Nations Statistics Division, New York, ...",Health personnel: Pharmacists (per 1000 popula...,0.0,"World Health Organisation (WHO), Geneva, WHO G..."
21,4,Afghanistan,2005,GDP per capita (US dollars),265,"United Nations Statistics Division, New York, ...",Health personnel: Nurses and midwives personne...,0.6,"World Health Organisation (WHO), Geneva, WHO G..."
23,4,Afghanistan,2010,GDP per capita (US dollars),537,"United Nations Statistics Division, New York, ...",Health personnel: Physicians (per 1000 populat...,0.2,"World Health Organisation (WHO), Geneva, WHO G..."
25,4,Afghanistan,2010,GDP per capita (US dollars),537,"United Nations Statistics Division, New York, ...",Health personnel: Pharmacists (per 1000 popula...,0.0,"World Health Organisation (WHO), Geneva, WHO G..."
27,4,Afghanistan,2015,GDP per capita (US dollars),554,"United Nations Statistics Division, New York, ...",Health personnel: Physicians (per 1000 populat...,0.3,"World Health Organisation (WHO), Geneva, WHO G..."
...,...,...,...,...,...,...,...,...,...
16106,716,Zimbabwe,2020,GDP per capita (US dollars),1383,"United Nations Statistics Division, New York, ...",Health personnel: Nurses and midwives personne...,2.0,"World Health Organisation (WHO), Geneva, WHO G..."
16108,716,Zimbabwe,2022,GDP per capita (US dollars),1619,"United Nations Statistics Division, New York, ...",Health personnel: Physicians (per 1000 populat...,0.2,"World Health Organisation (WHO), Geneva, WHO G..."
16110,716,Zimbabwe,2022,GDP per capita (US dollars),1619,"United Nations Statistics Division, New York, ...",Health personnel: Dentists (per 1000 population),0.2,"World Health Organisation (WHO), Geneva, WHO G..."
16112,716,Zimbabwe,2022,GDP per capita (US dollars),1619,"United Nations Statistics Division, New York, ...",Health personnel: Pharmacists (per 1000 popula...,0.1,"World Health Organisation (WHO), Geneva, WHO G..."


In [None]:
# Next is to create a line chart for a country, with x being the year, and y being the healthcare personnel relative to GDP
    # Are my units correct? Personnel per 1000 / GDP per capita? Standardize to make both per person? Per 100 people?
    # Also need to check whether all countries have values for dentists, physicians, and pharmacists? Adding them together will not work if some countries are missing some data.
        # Comparing one countries dentists + pharmacists with anothers dentists + pharmacists + physicians is not fair.