In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
from ipywidgets import interact
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import warnings; warnings.simplefilter('ignore')

In [None]:
fully_merged = pd.read_csv('../data/fully_merged_to_be_imported.csv')
fully_merged.describe(exclude = ['object', 'int64', 'bool'])

Note that the IQRs for the statistics collected are greatest for GDP per capita and the percentage of the population living on <$5.50/day--extreme ranges and standard deviations for both. Each statistic is right-skewed as well: note that the mean is greater than the median for each. 

**Note**: Country data only includes data from 1990 to 2019 to make all datasets cover about the same information over the same period of time.

In [None]:
sample_years = fully_merged.loc[fully_merged['Year'].isin([2018, 2010, 2000])]

fig = px.histogram(sample_years, 
                   x = "% Population Living on <$5.50/Day", 
                   color = 'Continent', 
                   barmode = 'stack', 
                   nbins = 20)
fig.show()

Europe by far enjoys the least poverty among its countries. Poverty in the remainder of the continents is distributed roughly equally, ignoring Oceania (given its size), and noting that certain Africa countries experience extremely high rates of poverty (the distribution for Africa is almost flipped). 

In [None]:
fig = px.histogram(sample_years, 
                   x = "% Population Living on <$1.90/Day",
                   color = 'Year',
                   color_discrete_sequence = px.colors.qualitative.Dark2,
                   marginal = 'box')
fig.show()

Extremely deep poverty has appeared to have been alleviated over the years, as percentages in most countries dropped from 2000 to 2018 to nearly 0%. Note also the decrease in outliers and variance.

In [None]:
fig = px.histogram(fully_merged, x = "% Income Held by Top 10%", pattern_shape = "Continent")
fig.show()

Income inequality has historically trended higher in North and South America overall, though extremely high outliers of income concentration are located in Africa.

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "% Population Living on <$1.90/Day", 
                 color = "Continent",
                 size= "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"])
fig.show()

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "% Population Living on <$5.50/Day", 
                 color = "Continent",
                 size= "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"])
fig.show()

Deep poverty is less correlated with income inequality, but marginally less deep poverty is suspect in this regard--particularly for countries in North America, South America and Africa. This conclusion is less true for countries in Europe and Asia. The foregoing suggests that income distribution among percentiles is to some extent, a zero-sum game.

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"], 
                 trendline = "ols")
fig.show()

Contrary to the above conclusions, plotting income inequality against GDP per capita indicates a negative correlation between the two. To the extent that more income distribution is not a zero-sum game, increased concentration of income at the top end of the distribution does not result in higher overall productivity. Note the distinction between wealth and income for clarity. Note also that the majority of the outliers are Luxembourg.

In [None]:
relevant_columns = ["GDP_Per_Capita", 
                    "% Income Held by Top 10%", 
                    "% Population Living on <$5.50/Day", 
                    "% Population Living on <$1.90/Day", 
                    "US Sanctions"]
corrs = fully_merged[relevant_columns].corr()

figure = ff.create_annotated_heatmap(
    z = corrs.values,
    x = list(corrs.columns),
    y = list(corrs.index),
    annotation_text = corrs.round(2).values,
    showscale = True)
figure.show()

The correlation heatmap emphasizes the apparent results of the scatterplots above, with one addition. Based on current data (which admittedly is limited), US sanctions are not strongly correlated with worse GDP Per Capita, poverty or income inequality. This is contrary to what would be expected, and more study and data are needed.

In [None]:
list_of_countries = list(fully_merged["Country"].sample(n = 10))
countries_series = fully_merged["Country"].loc[fully_merged["Country"].isin(list_of_countries)]
relevant_columns = ["GDP_Per_Capita", 
                    "% Income Held by Top 10%", 
                    "% Population Living on <$5.50/Day"]
@interact(y = relevant_columns)
def make_boxen(y):
    sns.set(rc={'figure.figsize':(15,10)})
    sns.boxenplot(x = countries_series, y = y, data = fully_merged, orient = "v", palette = "Set3", k_depth = "full")
    plt.show()

This code randomly samples 10 countries and generates boxenplots for each on the statistics for GDP Per Capita, poverty percentages and income inequality. There's not much to say here, as statistics for each country can vary wildly in terms of median, IQR, overall range and variability.

In [None]:
fig = px.choropleth(fully_merged, 
                    locations = "Code",
                    color = "% Population Living on <$5.50/Day",
                    color_continuous_scale = px.colors.sequential.haline,
                    projection = "orthographic",
                    hover_data = ["Country"])
fig.show()