In [216]:
import pandas as pd
import country_converter as cc

Although the intersection of our datasets goes back to 1971, we will start in 1992 as many countries were previously part of the USSR and thus did not have independent reporting statistics.

While country name standardization can be a problem, thankfully there is a solution. Both datasets contain IOS3 codes, which are universally able to identify geopolitical entities regardless of language or naming convention. Python has a program, country_converter, that will allow us to change these codes back into recognizable names.

# GDP

In [217]:
gdp = pd.read_csv("/Users/matthewcavanaugh/Desktop/GitHub/gdp-trends-from-1992-to-2019/National GDP table.csv")
gdp

Unnamed: 0,Entity,Code,Year,"GDP (output, multiple price benchmarks)"
0,Albania,ALB,1971,6958496093
1,Albania,ALB,1972,7246038085
2,Albania,ALB,1973,7536605468
3,Albania,ALB,1974,7855124023
4,Albania,ALB,1975,8164371093
...,...,...,...,...
10103,Zimbabwe,ZWE,2015,39798644531
10104,Zimbabwe,ZWE,2016,40963191406
10105,Zimbabwe,ZWE,2017,44316742187
10106,Zimbabwe,ZWE,2018,43420898437


In [218]:
gdp.rename(columns={"GDP (output, multiple price benchmarks)": "Adjusted GDP in USD"}, inplace=True)
gdp.rename(columns={"Code": "ISO3"}, inplace=True)
gdp.drop("Entity", axis=1, inplace=True)
gdp

Unnamed: 0,ISO3,Year,Adjusted GDP in USD
0,ALB,1971,6958496093
1,ALB,1972,7246038085
2,ALB,1973,7536605468
3,ALB,1974,7855124023
4,ALB,1975,8164371093
...,...,...,...
10103,ZWE,2015,39798644531
10104,ZWE,2016,40963191406
10105,ZWE,2017,44316742187
10106,ZWE,2018,43420898437


In [219]:
%%time

import country_converter as cc

# Convert the ISO3 codes to standard short country names (e.g. USA = United States)
gdp['ISO3'] = gdp["ISO3"].apply(lambda x: cc.convert(names=[x], to='name_short'))

# Any unmatched ISO3 codes will be rendered as 'not found'
# Warning messages will be displayed below
# Remove these values from the dataframe
gdp = gdp[gdp["ISO3"] != "not found"]

# Change the name of the column to 'Country'
gdp.rename(columns={"ISO3": "Country"}, inplace=True)

CPU times: user 4min 46s, sys: 1.21 s, total: 4min 47s
Wall time: 4min 47s


In [220]:
# Sort the dataframe first by 'Country', then by 'Year'
gdp = gdp.sort_values(by=["Country", "Year"])
gdp

Unnamed: 0,Country,Year,Adjusted GDP in USD
0,Albania,1971,6958496093
1,Albania,1972,7246038085
2,Albania,1973,7536605468
3,Albania,1974,7855124023
4,Albania,1975,8164371093
...,...,...,...
10103,Zimbabwe,2015,39798644531
10104,Zimbabwe,2016,40963191406
10105,Zimbabwe,2017,44316742187
10106,Zimbabwe,2018,43420898437


In [223]:
#gdp.to_csv("Cleaned GDP.csv", index=False)

# Population

In [226]:
population = pd.read_csv("/Users/matthewcavanaugh/Desktop/GitHub/gdp-trends-from-1992-to-2019/Population estimates.csv")
population

Unnamed: 0,Index,Country,ISO3,Year,Population (thousands),Male population (thousands),Female population (thousands),Population density (sq km),Sex ratio (males per 100 females),Median age,Population Change (thousands),Population growth rate,Births (thousands),Births by women aged 15 to 19 (thousands),Fertility rate,Life expectancy at birth,Infant mortality rate (per 1000 births),"Net Migration Rate (per 1,000 population)"
0,1,World,,1950,2493092.848,1241431.646,1251661.202,19.118,99.183,22.159,43337.691,1.738,91823.936,10492.124,4.852,46.394,138.135,0.000
1,2,World,,1951,2536927.035,1263362.496,1273564.539,19.454,99.199,22.123,44330.684,1.747,92507.128,10573.672,4.816,47.126,136.046,0.000
2,3,World,,1952,2584086.339,1287092.948,1296993.391,19.816,99.237,22.074,49987.925,1.935,97371.459,11033.785,5.001,48.218,132.666,0.000
3,4,World,,1953,2634106.235,1312354.748,1321751.488,20.200,99.289,22.011,50051.867,1.900,97291.452,11045.162,4.922,48.809,130.128,0.000
4,5,World,,1954,2685894.860,1338531.144,1347363.715,20.597,99.344,21.937,53525.382,1.993,100187.854,11301.725,4.998,49.651,127.710,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21973,21979,Wallis and Futuna Islands,WLF,2019,11.686,5.629,6.057,83.471,92.934,35.056,-0.108,-0.924,0.146,0.004,2.048,77.994,11.793,-14.291
21974,21980,Wallis and Futuna Islands,WLF,2020,11.616,5.587,6.029,82.971,92.653,35.590,-0.032,-0.275,0.111,0.004,1.574,78.164,11.618,-6.112
21975,21981,Wallis and Futuna Islands,WLF,2021,11.568,5.555,6.013,82.625,92.391,36.085,-0.065,-0.562,0.102,0.004,1.459,77.461,11.390,-6.138
21976,21982,Wallis and Futuna Islands,WLF,2022,11.478,5.495,5.984,81.986,91.828,36.599,-0.114,-0.993,0.100,0.005,1.423,78.501,11.239,-10.978


In [227]:
# 'Index' was mistakenly made a column at some point point, drop it
population.drop("Index", axis=1, inplace=True)

# Ensure "Year" is numeric
population["Year"] = population["Year"].astype("Int64")

# Drop the years outside our desired range (1971 to 2019)
population = population[(population["Year"] >= 1971) & (population["Year"] <= 2019)]

# Drop rows where the ISO3 is blank
population = population.dropna(subset=["ISO3"])

population

Unnamed: 0,Country,ISO3,Year,Population (thousands),Male population (thousands),Female population (thousands),Population density (sq km),Sex ratio (males per 100 females),Median age,Population Change (thousands),Population growth rate,Births (thousands),Births by women aged 15 to 19 (thousands),Fertility rate,Life expectancy at birth,Infant mortality rate (per 1000 births),"Net Migration Rate (per 1,000 population)"
2611,Burundi,BDI,1971,3617.289,1765.972,1851.318,139.395,95.390,16.235,87.589,2.422,173.238,18.478,7.213,43.172,146.984,-2.092
2612,Burundi,BDI,1972,3617.128,1764.218,1852.909,139.388,95.213,15.973,-87.913,-2.431,176.528,18.228,7.210,25.777,160.262,-32.749
2613,Burundi,BDI,1973,3611.991,1759.612,1852.379,139.190,94.992,15.736,77.640,2.150,168.491,17.359,7.215,37.409,149.952,0.787
2614,Burundi,BDI,1974,3703.590,1805.481,1898.110,142.720,95.120,15.841,105.559,2.850,174.302,17.753,7.222,43.337,146.098,2.718
2615,Burundi,BDI,1975,3796.612,1852.488,1944.123,146.305,95.287,15.882,80.484,2.120,181.986,18.356,7.244,43.445,144.912,-5.450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21969,Wallis and Futuna Islands,WLF,2015,12.160,5.888,6.272,86.857,93.862,32.693,-0.138,-1.135,0.154,0.006,1.972,77.450,12.453,-17.105
21970,Wallis and Futuna Islands,WLF,2016,12.033,5.821,6.212,85.950,93.706,33.284,-0.116,-0.964,0.154,0.005,2.004,77.545,12.338,-16.372
21971,Wallis and Futuna Islands,WLF,2017,11.918,5.760,6.158,85.129,93.537,33.854,-0.114,-0.957,0.153,0.005,2.027,77.628,12.235,-16.026
21972,Wallis and Futuna Islands,WLF,2018,11.800,5.694,6.106,84.289,93.245,34.455,-0.121,-1.025,0.126,0.004,1.716,77.763,12.084,-14.152


In [228]:
%%time

import country_converter as cc

# Convert the ISO3 codes to standard short country names (e.g. USA = United States)
population["ISO3"] = population["ISO3"].apply(lambda x: cc.convert(names=[x], to='name_short'))

# Any unmatched ISO3 codes will be rendered as 'not found'
# Warning messages will be displayed below
# Remove these values from the dataframe
population = population[population["ISO3"] != 'not found']

# Drop our old 'Country' column
population.drop("Country", axis=1, inplace=True)

# Change the name of the ISO3 column to 'Country'
population.rename(columns={"ISO3": "Country"}, inplace=True)

CPU times: user 5min 29s, sys: 1.45 s, total: 5min 30s
Wall time: 5min 31s


In [229]:
# Sort the dataframe first by 'Country', then by 'Year'
population = population.sort_values(by=['Country', 'Year'])
population

Unnamed: 0,Country,Year,Population (thousands),Male population (thousands),Female population (thousands),Population density (sq km),Sex ratio (males per 100 females),Median age,Population Change (thousands),Population growth rate,Births (thousands),Births by women aged 15 to 19 (thousands),Fertility rate,Life expectancy at birth,Infant mortality rate (per 1000 births),"Net Migration Rate (per 1,000 population)"
8457,Afghanistan,1971,11567.667,5925.771,5641.896,17.816,105.032,16.841,275.483,2.382,593.459,78.232,7.432,37.932,197.623,-1.107
8458,Afghanistan,1972,11853.696,6067.256,5786.440,18.256,104.853,16.713,296.574,2.502,607.433,80.313,7.453,38.423,194.114,-0.342
8459,Afghanistan,1973,12157.999,6218.247,5939.752,18.725,104.689,16.591,312.033,2.567,623.433,82.362,7.487,38.951,190.411,-0.249
8460,Afghanistan,1974,12469.127,6372.901,6096.225,19.204,104.538,16.474,310.221,2.488,640.572,84.579,7.526,39.469,186.769,-1.605
8461,Afghanistan,1975,12773.954,6524.498,6249.456,19.674,104.401,16.360,299.434,2.344,655.576,86.484,7.542,39.994,182.957,-3.477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4061,Zimbabwe,2015,14399.013,6810.139,7588.874,37.221,89.738,16.729,193.978,1.347,488.386,80.068,3.911,58.989,41.845,-11.266
4062,Zimbabwe,2016,14600.294,6909.792,7690.502,37.741,89.848,16.772,208.584,1.429,477.484,81.397,3.828,59.760,40.197,-9.601
4063,Zimbabwe,2017,14812.482,7015.107,7797.375,38.290,89.968,16.878,215.792,1.457,471.228,82.627,3.768,60.263,38.904,-8.658
4064,Zimbabwe,2018,15034.452,7125.471,7908.981,38.864,90.093,17.022,228.148,1.518,470.991,84.606,3.744,60.906,37.668,-7.833


In [230]:
#population.to_csv("Cleaned population.csv", index=False)

# Combining the datasets

In [1]:
import pandas as pd
gdp = pd.read_csv("Cleaned GDP.csv")
population = pd.read_csv("Cleaned population.csv")

In [2]:
# First, find the common countries across both DataFrames
common_countries = set(gdp['Country']) & set(population['Country'])
                                             
# Filter each DataFrames to keep only the common countries
population_filtered = population[population['Country'].isin(common_countries)]
gdp_filtered = gdp[gdp['Country'].isin(common_countries)]

# Merge all DataFrames on 'Country' and 'Year'
popandgdp = gdp_filtered.merge(population_filtered, on=['Country', 'Year'], how='inner')

# Display our combined DataFrame to ensure these changes were properly applied
popandgdp

Unnamed: 0,Country,Year,Adjusted GDP in USD,Population (thousands),Male population (thousands),Female population (thousands),Population density (sq km),Sex ratio (males per 100 females),Median age,Population Change (thousands),Population growth rate,Births (thousands),Births by women aged 15 to 19 (thousands),Fertility rate,Life expectancy at birth,Infant mortality rate (per 1000 births),"Net Migration Rate (per 1,000 population)"
0,Albania,1971,6958496093,2234.992,1130.886,1104.106,81.575,102.425,18.059,54.042,2.418,73.875,3.380,5.083,65.896,78.069,-0.408
1,Albania,1972,7246038085,2289.265,1158.899,1130.367,83.556,102.524,18.185,54.505,2.381,74.682,3.447,4.979,66.526,73.160,-0.694
2,Albania,1973,7536605468,2344.105,1187.153,1156.951,85.558,102.610,18.337,55.174,2.354,74.894,3.568,4.829,67.107,68.705,-0.605
3,Albania,1974,7855124023,2399.401,1215.590,1183.812,87.576,102.684,18.515,55.418,2.310,75.465,3.775,4.700,67.633,64.687,-0.784
4,Albania,1975,8164371093,2454.859,1244.047,1210.812,89.600,102.745,18.724,55.498,2.261,76.181,4.052,4.566,68.120,61.088,-1.063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8221,Zimbabwe,2015,39798644531,14399.013,6810.139,7588.874,37.221,89.738,16.729,193.978,1.347,488.386,80.068,3.911,58.989,41.845,-11.266
8222,Zimbabwe,2016,40963191406,14600.294,6909.792,7690.502,37.741,89.848,16.772,208.584,1.429,477.484,81.397,3.828,59.760,40.197,-9.601
8223,Zimbabwe,2017,44316742187,14812.482,7015.107,7797.375,38.290,89.968,16.878,215.792,1.457,471.228,82.627,3.768,60.263,38.904,-8.658
8224,Zimbabwe,2018,43420898437,15034.452,7125.471,7908.981,38.864,90.093,17.022,228.148,1.518,470.991,84.606,3.744,60.906,37.668,-7.833


In [3]:
# Drop rows before 1992 and after 2019
popandgdp = popandgdp[(popandgdp['Year'] >= 1992) & (popandgdp['Year'] <= 2019)]

# Ensure 'popandgdp' has the necessary columns
# Drop years outside our desired range
expected_years = set(range(1992, 2020))  # 2020 is exclusive
grouped = popandgdp.groupby("Country")["Year"].agg(set)

# Identify countries with missing years
incomplete_countries = grouped[grouped.apply(lambda x: x != expected_years)].index

# Remove rows with incomplete Country/Year sets
popandgdp = popandgdp[~popandgdp["Country"].isin(incomplete_countries)]

popandgdp

Unnamed: 0,Country,Year,Adjusted GDP in USD,Population (thousands),Male population (thousands),Female population (thousands),Population density (sq km),Sex ratio (males per 100 females),Median age,Population Change (thousands),Population growth rate,Births (thousands),Births by women aged 15 to 19 (thousands),Fertility rate,Life expectancy at birth,Infant mortality rate (per 1000 births),"Net Migration Rate (per 1,000 population)"
21,Albania,1992,9767368164,3282.501,1641.611,1640.890,119.808,100.044,23.656,-2.468,-0.075,82.030,2.651,2.885,73.303,31.895,-20.271
22,Albania,1993,11222619140,3277.825,1635.188,1642.637,119.637,99.547,24.056,-6.885,-0.210,79.305,2.865,2.811,73.638,30.625,-20.881
23,Albania,1994,12672190429,3269.410,1626.657,1642.753,119.330,99.020,24.453,-9.945,-0.304,77.976,3.120,2.798,73.837,29.486,-21.461
24,Albania,1995,13669630859,3258.567,1616.705,1641.862,118.934,98.468,24.830,-11.740,-0.360,75.805,3.218,2.762,74.022,28.365,-21.416
25,Albania,1996,15067375000,3245.678,1605.548,1640.130,118.464,97.892,25.174,-14.038,-0.433,71.875,2.216,2.666,74.113,27.271,-20.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8221,Zimbabwe,2015,39798644531,14399.013,6810.139,7588.874,37.221,89.738,16.729,193.978,1.347,488.386,80.068,3.911,58.989,41.845,-11.266
8222,Zimbabwe,2016,40963191406,14600.294,6909.792,7690.502,37.741,89.848,16.772,208.584,1.429,477.484,81.397,3.828,59.760,40.197,-9.601
8223,Zimbabwe,2017,44316742187,14812.482,7015.107,7797.375,38.290,89.968,16.878,215.792,1.457,471.228,82.627,3.768,60.263,38.904,-8.658
8224,Zimbabwe,2018,43420898437,15034.452,7125.471,7908.981,38.864,90.093,17.022,228.148,1.518,470.991,84.606,3.744,60.906,37.668,-7.833


Reduce the number of columns. We will only need population and GDP to build our remaining metrics.

In [4]:
popandgdp['Population (thousands)'] = popandgdp['Population (thousands)'] * 1000
popandgdp.rename(columns={'Population (thousands)': 'Population'}, inplace=True)
popandgdp['Population'] = popandgdp['Population'].round().astype(int)
popandgdp = popandgdp[['Country', 'Year', 'Population', 'Adjusted GDP in USD']]
popandgdp

Unnamed: 0,Country,Year,Population,Adjusted GDP in USD
21,Albania,1992,3282501,9767368164
22,Albania,1993,3277825,11222619140
23,Albania,1994,3269410,12672190429
24,Albania,1995,3258567,13669630859
25,Albania,1996,3245678,15067375000
...,...,...,...,...
8221,Zimbabwe,2015,14399013,39798644531
8222,Zimbabwe,2016,14600294,40963191406
8223,Zimbabwe,2017,14812482,44316742187
8224,Zimbabwe,2018,15034452,43420898437


With 4564 rows and a span of 1992 to 2019, there should be 163 unique countries and 28 unique years. Let's check.

In [5]:
popandgdp['Country'].nunique()

163

In [6]:
popandgdp['Year'].nunique()

28

Success!

# GDP analysis

In [7]:
popandgdp['GDP per capita'] = popandgdp['GDP per capita'] = popandgdp['Adjusted GDP in USD'] / popandgdp['Population']
popandgdp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  popandgdp['GDP per capita'] = popandgdp['GDP per capita'] = popandgdp['Adjusted GDP in USD'] / popandgdp['Population']


Unnamed: 0,Country,Year,Population,Adjusted GDP in USD,GDP per capita
21,Albania,1992,3282501,9767368164,2975.587262
22,Albania,1993,3277825,11222619140,3423.800581
23,Albania,1994,3269410,12672190429,3875.986930
24,Albania,1995,3258567,13669630859,4194.982291
25,Albania,1996,3245678,15067375000,4642.288915
...,...,...,...,...,...
8221,Zimbabwe,2015,14399013,39798644531,2763.984207
8222,Zimbabwe,2016,14600294,40963191406,2805.641544
8223,Zimbabwe,2017,14812482,44316742187,2991.851209
8224,Zimbabwe,2018,15034452,43420898437,2888.093190


In [8]:
popandgdp.isnull().sum()

Country                0
Year                   0
Population             0
Adjusted GDP in USD    0
GDP per capita         0
dtype: int64

In [9]:
# There is an indexing problem unless I do these steps. Will work on simplifying.
year1992 = popandgdp[popandgdp['Year'] == 1992]
year2019 = popandgdp[popandgdp['Year'] == 2019]
year1992copy = year1992.copy()
year1992copy.set_index('Country', inplace=True)
year2019copy = year2019.copy()
year2019copy.set_index('Country', inplace=True)
year1992copy['GDP per capita'] = year1992copy['Adjusted GDP in USD'] / year1992copy['Population']
year2019copy['GDP per capita'] = year2019copy['Adjusted GDP in USD'] / year2019copy['Population']

In [10]:
gdpcomp = pd.DataFrame()
gdpcomp['GDP 1992'] = year1992copy['Adjusted GDP in USD']
gdpcomp['GDP rank 1992'] = gdpcomp['GDP 1992'].rank(ascending=False, method='min').astype(int)
gdpcomp['GDP per capita 1992'] = year1992copy['GDP per capita']
gdpcomp['GDP per capita rank 1992'] = gdpcomp['GDP per capita 1992'].rank(ascending=False, method='min').astype(int)
gdpcomp['GDP 2019'] = year2019copy['Adjusted GDP in USD']
gdpcomp['GDP rank 2019'] = gdpcomp['GDP 2019'].rank(ascending=False, method='min').astype(int)
gdpcomp['GDP per capita 2019'] = year2019copy['GDP per capita']
gdpcomp['GDP per capita rank 2019'] = gdpcomp['GDP per capita 2019'].rank(ascending=False, method='min').astype(int)
gdpcomp['GDP rank change'] = gdpcomp['GDP rank 1992'] - gdpcomp['GDP rank 2019']
gdpcomp['GDP per capita rank change'] = gdpcomp['GDP per capita rank 1992'] - gdpcomp['GDP per capita rank 2019']
gdpcomp['GDP change raw'] = gdpcomp['GDP 2019'] - gdpcomp['GDP 1992']
gdpcomp['GDP change raw'] = gdpcomp['GDP change raw'].round(3)
gdpcomp['GDP per capita change raw'] = gdpcomp['GDP per capita 2019'] - gdpcomp['GDP per capita 1992']
gdpcomp['GDP per capita change raw'] = gdpcomp['GDP per capita change raw'].round(3)
gdpcomp['GDP growth pct'] = ((gdpcomp['GDP 2019'] - gdpcomp['GDP 1992']) / gdpcomp['GDP 1992']) * 100
gdpcomp['GDP growth pct'] = gdpcomp['GDP growth pct'].round(3)
gdpcomp['GDP per capita growth pct'] = ((gdpcomp['GDP per capita 2019'] - gdpcomp['GDP per capita 1992']) / gdpcomp['GDP per capita 1992']) * 100
gdpcomp['GDP per capita growth pct'] = gdpcomp['GDP per capita growth pct'].round(3)
gdpcomp['GDP growth equitability'] = gdpcomp['GDP per capita growth pct'] - gdpcomp['GDP growth pct']
#gdpcomp.set_index('Country', inplace=True)
gdpcomp

Unnamed: 0_level_0,GDP 1992,GDP rank 1992,GDP per capita 1992,GDP per capita rank 1992,GDP 2019,GDP rank 2019,GDP per capita 2019,GDP per capita rank 2019,GDP rank change,GDP per capita rank change,GDP change raw,GDP per capita change raw,GDP growth pct,GDP per capita growth pct,GDP growth equitability
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Albania,9767368164,112,2975.587262,117,36103042968,113,12514.009646,92,-1,25,26335674804,9538.422,269.629,320.556,50.927
Algeria,244668703125,33,9188.203554,69,507487562500,40,11721.743485,100,-7,-31,262818859375,2533.540,107.418,27.574,-79.844
Angola,38985789062,70,3138.014553,114,227855718750,61,7037.877091,119,9,-5,188869929688,3899.863,484.458,124.278,-360.180
Anguilla,251870574,162,28041.702739,25,225680526,162,15225.023679,77,0,-52,-26190048,-12816.679,-10.398,-45.706,-35.308
Argentina,283126843750,26,8403.004047,72,977420562500,25,21733.272331,63,1,9,694293718750,13330.268,245.224,158.637,-86.587
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Uruguay,35659320312,73,11357.549319,55,71122789062,88,20935.671567,65,-15,-10,35463468750,9578.122,99.451,84.333,-15.118
Vietnam,103313757812,52,1513.525458,137,724123375000,31,7451.839424,117,21,20,620809617188,5938.314,600.897,392.350,-208.547
Yemen,9439888671,116,629.767000,161,51828058593,99,1476.103111,155,17,6,42388169922,846.336,449.033,134.389,-314.644
Zambia,9762194335,113,1193.906859,145,56783714843,92,3067.095638,139,21,6,47021520508,1873.189,481.670,156.896,-324.774


Let's look at which countries have had the strongest GDP growth by percentage.

In [11]:
print(f"GDP growth percentage summary:")
print(f" Number of countries with positive scores: {gdpcomp[gdpcomp['GDP growth pct'] > 0].index.nunique()}")
print(f" Number of countries with negative scores: {gdpcomp[gdpcomp['GDP growth pct'] < 0].index.nunique()}")
print(f" Mean score: {gdpcomp['GDP growth pct'].mean().round(3)} (Closest: {gdpcomp['GDP growth pct'].sub(gdpcomp['GDP growth pct'].mean()).abs().idxmin()})")
print(f" Median score: {gdpcomp['GDP growth pct'].median().round(3)} ({gdpcomp['GDP growth pct'].sub(gdpcomp['GDP growth pct'].median()).abs().idxmin()})")
print(f" Highest score: {gdpcomp['GDP growth pct'].max().round(3)} ({gdpcomp['GDP growth pct'].idxmax()})")
print(f" Lowest score: {gdpcomp['GDP growth pct'].min()} ({gdpcomp['GDP growth pct'].idxmin()})")

GDP growth percentage summary:
 Number of countries with positive scores: 160
 Number of countries with negative scores: 3
 Mean score: 260.468 (Closest: Türkiye)
 Median score: 184.037 (Georgia)
 Highest score: 2831.032 (Equatorial Guinea)
 Lowest score: -76.679 (Montserrat)


In [12]:
gdpcomp['GDP growth pct'].nlargest(10)

Country
Equatorial Guinea    2831.032
Qatar                2065.349
Ethiopia              847.464
Bahrain               841.667
Laos                  745.554
El Salvador           735.598
Mali                  652.104
Jordan                645.798
Myanmar               637.763
Egypt                 608.297
Name: GDP growth pct, dtype: float64

In [13]:
gdpcomp['GDP growth pct'].nsmallest(10)

Country
Montserrat                       -76.679
Zimbabwe                         -32.145
Anguilla                         -10.398
Aruba                              2.871
Barbados                           6.050
Dominica                           7.051
Ukraine                           10.314
Japan                             25.029
Guinea                            28.865
St. Vincent and the Grenadines    33.606
Name: GDP growth pct, dtype: float64

Now let's see what countries have seen the strong per capita GDP growth.

In [14]:
print(f"GDP per capita percentage growth summary:")
print(f" Number of countries with positive scores: {gdpcomp[gdpcomp['GDP per capita growth pct'] > 0].index.nunique()}")
print(f" Number of countries with negative scores: {gdpcomp[gdpcomp['GDP per capita growth pct'] < 0].index.nunique()}")
print(f" Mean score: {gdpcomp['GDP per capita growth pct'].mean().round(3)} (Closest: {gdpcomp['GDP per capita growth pct'].sub(gdpcomp['GDP per capita growth pct'].mean()).abs().idxmin()})")
print(f" Median score: {gdpcomp['GDP per capita growth pct'].median().round(3)} ({gdpcomp['GDP per capita growth pct'].sub(gdpcomp['GDP per capita growth pct'].median()).abs().idxmin()})")
print(f" Highest score: {gdpcomp['GDP per capita growth pct'].max().round(3)} ({gdpcomp['GDP per capita growth pct'].idxmax()})")
print(f" Lowest score: {gdpcomp['GDP per capita growth pct'].min()} ({gdpcomp['GDP per capita growth pct'].idxmin()})")

GDP per capita percentage growth summary:
 Number of countries with positive scores: 150
 Number of countries with negative scores: 13
 Mean score: 127.905 (Closest: Rwanda)
 Median score: 88.575 (Mozambique)
 Highest score: 796.362 (Equatorial Guinea)
 Lowest score: -52.445 (Zimbabwe)


In [15]:
gdpcomp['GDP per capita growth pct'].nlargest(10)

Country
Equatorial Guinea    796.362
El Salvador          650.094
Myanmar              473.921
Laos                 432.691
China                413.089
Ireland              397.556
Vietnam              392.350
Mongolia             354.843
India                338.500
Lebanon              337.114
Name: GDP per capita growth pct, dtype: float64

In [16]:
gdpcomp['GDP per capita growth pct'].nsmallest(10)

Country
Zimbabwe                   -52.445
Montserrat                 -48.699
Anguilla                   -45.706
United Arab Emirates       -43.168
Aruba                      -33.831
Guinea                     -31.640
DR Congo                   -31.422
Burundi                    -23.138
Belize                     -17.574
Central African Republic   -10.478
Name: GDP per capita growth pct, dtype: float64

Let's took at GDP growth equitability, which measures the difference between total and per capita GDP growth. A positive score means per capita growth outpaced total growth, and a negative score the opposite.

In [17]:
print(f"Growth Equitability Score summary:")
print(f" Number of countries with positive scores: {gdpcomp[gdpcomp['GDP growth equitability'] > 0].index.nunique()}")
print(f" Number of countries with negative scores: {gdpcomp[gdpcomp['GDP growth equitability'] < 0].index.nunique()}")
print(f" Mean score: {gdpcomp['GDP growth equitability'].mean().round(3)} (Closest: {gdpcomp['GDP growth equitability'].sub(gdpcomp['GDP growth equitability'].mean()).abs().idxmin()})")
print(f" Median score: {gdpcomp['GDP growth equitability'].median().round(3)} ({gdpcomp['GDP growth equitability'].sub(gdpcomp['GDP growth equitability'].median()).abs().idxmin()})")
print(f" Highest score: {gdpcomp['GDP growth equitability'].max().round(3)} ({gdpcomp['GDP growth equitability'].idxmax()})")
print(f" Lowest score: {gdpcomp['GDP growth equitability'].min()} ({gdpcomp['GDP growth equitability'].idxmin()})")

Growth Equitability Score summary:
 Number of countries with positive scores: 19
 Number of countries with negative scores: 144
 Mean score: -132.563 (Closest: Peru)
 Median score: -84.266 (Suriname)
 Highest score: 112.027 (Georgia)
 Lowest score: -2034.67 (Equatorial Guinea)


In [18]:
gdpcomp['GDP growth equitability'].nlargest(10)

Country
Georgia       112.027
Moldova       106.901
Armenia        78.688
Lithuania      75.284
Latvia         72.507
Romania        56.694
Albania        50.927
Croatia        47.323
Estonia        47.030
Montserrat     27.980
Name: GDP growth equitability, dtype: float64

In [19]:
gdpcomp['GDP growth equitability'].nsmallest(10)

Country
Equatorial Guinea   -2034.670
Qatar               -1801.875
Bahrain              -594.485
Ethiopia             -522.972
Jordan               -457.337
Mali                 -411.060
Angola               -360.180
Oman                 -339.578
Liberia              -335.768
Zambia               -324.774
Name: GDP growth equitability, dtype: float64

Growth has been strong overall. Out of 163 countries, 160 have grown their economies in absolute terms, and 150 have seen the average person get richer. Despite this, only 19 countries have strong per capita than total growth. This creates some discrepancies.

For example, Equatorial Guinea has an extremely low equitability score because its total GDP growth was vastly stronger, despite per capita GDP growth still being the highest in the world at nearly 800%. Thus, -2000 feels too harsh. 

On the other hand, Montserrat has a positive equitability score of about 24. But this is misleading because its total GDP was almost cut in half. This is an especially poor performance considering it is one of just 3 countries to experience absolute contraction.

To better balance all factors, I created a new metric called ['Adjusted growth equitability score'] that balances multiple factors:

A higher ['GDP per capita rank change'] is rewarded, and a lower rank is punished. Using the ranking, rather than raw numbers, allows each country's growth to be fairly measured relative to others. This will prevent countries from receiving high scores if their economy has failed to keep pace with worldwide growth.

A lower ['GDP growth equitability'] is punished, the penalty increasing with a larger ['GDP growth raw']. It wouldn't be fair to measure ['GDP growth raw'] on its own terms considering the difference in population between countries, but it is noteworthy if massive amounts of wealth are created without being shared equitably. This also creates leniency for countries whose economies were smaller to begin with.

I'll also list the rankings for each country.

In [20]:
# Calculate an adjusted growth metric
gdpcomp['Adjusted growth equitability score'] = (
    gdpcomp['GDP per capita rank change']
    - (gdpcomp['GDP growth equitability'] * gdpcomp['GDP change raw'] / gdpcomp['GDP change raw'].max())
).round(3)

# Calculate each country's ranking in the new metric
gdpcomp['Adjusted growth equitability rank'] = gdpcomp['Adjusted growth equitability score'].rank(ascending=False, method='min').astype(int)

Let's see what this new metric looks like.

In [21]:
print(f"Adjusted Growth Equitability Score summary:")
print(f" Number of countries with positive scores: {gdpcomp[gdpcomp['Adjusted growth equitability score'] > 0].index.nunique()}")
print(f" Number of countries with negative scores: {gdpcomp[gdpcomp['Adjusted growth equitability score'] < 0].index.nunique()}")
print(f" Mean score: {gdpcomp['Adjusted growth equitability score'].mean().round(3)} (Closest: {gdpcomp['Adjusted growth equitability score'].sub(gdpcomp['Adjusted growth equitability score'].mean()).abs().idxmin()})")
print(f" Median score: {gdpcomp['Adjusted growth equitability score'].median()} ({gdpcomp['Adjusted growth equitability score'].sub(gdpcomp['Adjusted growth equitability score'].median()).abs().idxmin()})")
print(f" Highest score: {gdpcomp['Adjusted growth equitability score'].max()} ({gdpcomp['Adjusted growth equitability score'].idxmax()})")
print(f" Lowest score: {gdpcomp['Adjusted growth equitability score'].min()} ({gdpcomp['Adjusted growth equitability score'].idxmin()})")

Adjusted Growth Equitability Score summary:
 Number of countries with positive scores: 91
 Number of countries with negative scores: 72
 Mean score: 3.143 (Closest: Trinidad and Tobago)
 Median score: 2.399 (Costa Rica)
 Highest score: 141.498 (China)
 Lowest score: -64.0 (Montserrat)


In [22]:
gdpcomp['Adjusted growth equitability score'].nlargest(15)

Country
China                141.498
India                126.637
Equatorial Guinea     68.880
Qatar                 45.722
El Salvador           39.232
Ireland               36.853
Egypt                 35.588
Myanmar               35.324
Panama                32.531
Bahrain               30.413
Lebanon               30.212
Romania               29.753
Poland                28.996
Singapore             28.827
Ethiopia              28.404
Name: Adjusted growth equitability score, dtype: float64

In [23]:
gdpcomp['Adjusted growth equitability score'].nsmallest(15)

Country
Montserrat                       -64.000
Anguilla                         -52.000
Zimbabwe                         -48.023
Barbados                         -41.000
Dominica                         -41.000
Aruba                            -37.000
Belize                           -35.996
Guinea                           -34.977
Comoros                          -31.996
Algeria                          -29.764
Eswatini                         -28.996
Ukraine                          -26.058
St. Vincent and the Grenadines   -25.000
Gabon                            -23.786
Tunisia                          -20.760
Name: Adjusted growth equitability score, dtype: float64

Intuitively, this feels like a much better measure of economic improvement.

Montserrat has the lowest adjusted score mostly because its per capita GDP rank went from 10th in 1992 to 74th in 2019, the worst dropoff in the world. A mildly "positive" equitability couldn't save it.

Equatorial Guinea, despite extreme inequitability, has the third highest adjusted score at about 68.8. This feels fair because though it has come a long way, it remains relatively poor compared to China and India.

Have a country in mind? Enter its name in the quotes to see all of its statistics!

In [65]:
def lookup_country_stats(df, country):
    if country in df.index:
        print(f"Statistics for {country}:\n" + "-" * (13 + len(country)))
        for col, val in zip(df.columns, df.loc[country].values.tolist()):
            print(f"{col}: {val}")
        print(f"(Ranks are out of 163)")
    else:
        print(f"Country '{country}' not in dataset \n (May have had missing data or does not exist) \n (Check spelling as country name may include special characters)")

In [66]:
# Example usage:
lookup_country_stats(gdpcomp, 'United States')

Statistics for United States:
--------------------------
GDP 1992: 10301173000000.0
GDP rank 1992: 1.0
GDP per capita 1992: 39680.02375168351
GDP per capita rank 1992: 6.0
GDP 2019: 20595844000000.0
GDP rank 2019: 1.0
GDP per capita 2019: 60972.31982845724
GDP per capita rank 2019: 10.0
GDP rank change: 0.0
GDP per capita rank change: -4.0
GDP change raw: 10294671000000.0
GDP per capita change raw: 21292.296
GDP growth pct: 99.937
GDP per capita growth pct: 53.66
GDP growth equitability: -46.277
Adjusted growth equitability score: 24.07
Adjusted growth equitability rank: 24.0
(Ranks are out of 163)
