In [495]:
import pandas as pd
import numpy as np

countriesContinentMap = pd.read_csv("Countries by continents.csv", keep_default_na=True, delimiter=',', skipinitialspace=True)
lifeExpectancy = pd.read_csv("life-expectancy.csv", keep_default_na=True, delimiter=',', skipinitialspace=True)
population = pd.read_csv("API_SP.POP.TOTL_DS2_en_csv_v2_19373.csv", skiprows=4, keep_default_na=True, delimiter=',', skipinitialspace=True)
gdpPerCapita = pd.read_csv("API_NY.GDP.PCAP.CD_DS2_en_csv_v2_19346.csv", skiprows=4, keep_default_na=True, delimiter=',', skipinitialspace=True)
gnpPerCapita = pd.read_csv("API_NY.GNP.PCAP.CD_DS2_en_csv_v2_21131.csv", skiprows=4, keep_default_na=True, delimiter=',', skipinitialspace=True)
pppPerCapita = pd.read_csv("API_PA.NUS.PPP_DS2_en_csv_v2_22599.csv", skiprows=4, keep_default_na=True, delimiter=',', skipinitialspace=True)

In [496]:
countriesContinentMap.rename(columns={
    "Country": "Country Name"
}, inplace=True)

countriesContinentMap.head()

Unnamed: 0,Continent,Country Name
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina


In [497]:
lifeExpectancy.rename(columns={
    "Entity": "Country Name",
    "Code": "Country Code"
}, inplace=True)

lifeExpectancy.head()

Unnamed: 0,Country Name,Country Code,Year,Period life expectancy at birth - Sex: total - Age: 0
0,Afghanistan,AFG,1950,28.1563
1,Afghanistan,AFG,1951,28.5836
2,Afghanistan,AFG,1952,29.0138
3,Afghanistan,AFG,1953,29.4521
4,Afghanistan,AFG,1954,29.6975


In [498]:
population = population.melt(
    id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
    var_name="Year",
    value_name="Population"
)

population.drop(columns=["Indicator Name", "Indicator Code"], inplace=True)

population = population[population["Year"].str.isnumeric()]
population = population[population["Year"].astype(int).between(1960, 2023)]

population.head()

Unnamed: 0,Country Name,Country Code,Year,Population
0,Aruba,ABW,1960,54922.0
1,Africa Eastern and Southern,AFE,1960,130072080.0
2,Afghanistan,AFG,1960,9035043.0
3,Africa Western and Central,AFW,1960,97630925.0
4,Angola,AGO,1960,5231654.0


In [499]:
gdpPerCapita = gdpPerCapita.melt(
    id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
    var_name="Year",
    value_name="GDP Per Capita"
)

gdpPerCapita.drop(columns=["Indicator Name", "Indicator Code"], inplace=True)

gdpPerCapita = gdpPerCapita[gdpPerCapita["Year"].str.isnumeric()]
gdpPerCapita = gdpPerCapita[gdpPerCapita["Year"].astype(int).between(1960, 2023)]

gdpPerCapita.head()

Unnamed: 0,Country Name,Country Code,Year,GDP Per Capita
0,Aruba,ABW,1960,
1,Africa Eastern and Southern,AFE,1960,186.132432
2,Afghanistan,AFG,1960,
3,Africa Western and Central,AFW,1960,121.938353
4,Angola,AGO,1960,


In [500]:
gnpPerCapita = gnpPerCapita.melt(
    id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
    var_name="Year",
    value_name="GNP Per Capita"
)

gnpPerCapita.drop(columns=["Indicator Name", "Indicator Code"], inplace=True)

gnpPerCapita = gnpPerCapita[gnpPerCapita["Year"].str.isnumeric()]
gnpPerCapita = gnpPerCapita[gnpPerCapita["Year"].astype(int).between(1962, 2023)]

gnpPerCapita.head()

Unnamed: 0,Country Name,Country Code,Year,GNP Per Capita
532,Aruba,ABW,1962,
533,Africa Eastern and Southern,AFE,1962,189.385293
534,Afghanistan,AFG,1962,
535,Africa Western and Central,AFW,1962,
536,Angola,AGO,1962,


In [501]:
pppPerCapita = pppPerCapita.melt(
    id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
    var_name="Year",
    value_name="PPP Per Capita"
)

pppPerCapita.drop(columns=["Indicator Name", "Indicator Code"], inplace=True)

pppPerCapita = pppPerCapita[pppPerCapita["Year"].str.isnumeric()]
pppPerCapita = pppPerCapita[pppPerCapita["Year"].astype(int).between(1962, 2023)]

pppPerCapita.head()

Unnamed: 0,Country Name,Country Code,Year,PPP Per Capita
532,Aruba,ABW,1962,
533,Africa Eastern and Southern,AFE,1962,
534,Afghanistan,AFG,1962,
535,Africa Western and Central,AFW,1962,
536,Angola,AGO,1962,


In [502]:
life_codes = set(lifeExpectancy["Country Code"].unique())
gdp_codes = set(gdpPerCapita["Country Code"].unique())
gnp_codes = set(gnpPerCapita["Country Code"].unique())
ppp_codes = set(pppPerCapita["Country Code"].unique())

common_codes = life_codes.intersection(gdp_codes).intersection(gnp_codes).intersection(ppp_codes)

print(f"Number of common country codes across all datasets: {len(common_codes)}")

Number of common country codes across all datasets: 215


In [503]:
lifeExpectancy = lifeExpectancy[lifeExpectancy["Country Code"].isin(common_codes)]
gdpPerCapita = gdpPerCapita[gdpPerCapita["Country Code"].isin(common_codes)]
gnpPerCapita = gnpPerCapita[gnpPerCapita["Country Code"].isin(common_codes)]
pppPerCapita = pppPerCapita[pppPerCapita["Country Code"].isin(common_codes)]

In [504]:
gdpPerCapita["Year"] = gdpPerCapita["Year"].astype(int)
lifeExpectancy["Year"] = lifeExpectancy["Year"].astype(int)
gnpPerCapita["Year"] = gnpPerCapita["Year"].astype(int)
pppPerCapita["Year"] = pppPerCapita["Year"].astype(int)
population["Year"] = population["Year"].astype(int)

combined = pd.merge(
    population,
    lifeExpectancy,
    on=["Country Code", "Year", "Country Name"],
    how="inner"
)

combined = pd.merge(
    combined,
    gdpPerCapita,
    on=["Country Code", "Year", "Country Name"],
    how="inner"
)

combined = pd.merge(
    combined,
    gnpPerCapita,
    on=["Country Code", "Year", "Country Name"],
    how="inner"
)

combined = pd.merge(
    combined,
    pppPerCapita,
    on=["Country Code", "Year", "Country Name"],
    how="inner"
)

print(combined.head())


  Country Name Country Code  Year  Population  \
0        Aruba          ABW  1962     56320.0   
1  Afghanistan          AFG  1962   9404406.0   
2       Angola          AGO  1962   5354310.0   
3      Albania          ALB  1962   1711319.0   
4      Andorra          AND  1962     11086.0   

   Period life expectancy at birth - Sex: total - Age: 0  GDP Per Capita  \
0                                            64.6020                 NaN   
1                                            33.7565                 NaN   
2                                            37.1683                 NaN   
3                                            58.4944                 NaN   
4                                            72.9929                 NaN   

   GNP Per Capita  PPP Per Capita  
0             NaN             NaN  
1             NaN             NaN  
2             NaN             NaN  
3             NaN             NaN  
4             NaN             NaN  


In [505]:
combined['Country Name'] = combined['Country Name'].replace({
    "Cote d'Ivoire": "Côte d'Ivoire",
    "Eswatini": "Swaziland"
})

territory_continents = {
    'Aruba': 'North America',
    'American Samoa': 'Oceania',
    'Bermuda': 'North America',
    'Curacao': 'North America',
    'Cayman Islands': 'North America',
    'Faroe Islands': 'Europe',
    'Gibraltar': 'Europe',
    'Greenland': 'North America',
    'Guam': 'Oceania',
    'Isle of Man': 'Europe',
    'Northern Mariana Islands': 'Oceania',
    'New Caledonia': 'Oceania',
    'Puerto Rico': 'North America',
    'French Polynesia': 'Oceania',
    'Sint Maarten (Dutch part)': 'North America',
    'Turks and Caicos Islands': 'North America',
    'British Virgin Islands': 'North America',
    'Burkina Faso': 'Africa',
    "Côte d'Ivoire": 'Africa',
    'North Macedonia': 'Europe',
    'Myanmar': 'Asia'
}

combined = pd.merge(combined, countriesContinentMap, on='Country Name', how='left')

combined['Continent'] = combined.apply(
    lambda row: territory_continents.get(row['Country Name'], row['Continent']),
    axis=1
)

combined.head()

Unnamed: 0,Country Name,Country Code,Year,Population,Period life expectancy at birth - Sex: total - Age: 0,GDP Per Capita,GNP Per Capita,PPP Per Capita,Continent
0,Aruba,ABW,1962,56320.0,64.602,,,,North America
1,Afghanistan,AFG,1962,9404406.0,33.7565,,,,Asia
2,Angola,AGO,1962,5354310.0,37.1683,,,,Africa
3,Albania,ALB,1962,1711319.0,58.4944,,,,Europe
4,Andorra,AND,1962,11086.0,72.9929,,,,Europe


In [506]:
combined.to_csv("WorldData.csv")