In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import warnings; warnings.simplefilter('ignore')

In [None]:
fully_merged = pd.read_csv('../data/fully_merged_to_be_imported.csv')
fully_merged.describe(exclude = ['object', 'int64', 'bool'])

In [None]:
sample_years = fully_merged.loc[fully_merged['Year'].isin([2018, 2010, 2000])]

fig = px.histogram(sample_years, x = "% Population Living on <$5.50/Day", color = 'Year', barmode = 'group')
fig.show()

In [None]:
fig = px.histogram(sample_years, x = "% Population Living on <$1.90/Day", color = 'Year', barmode = 'group')
fig.show()

In [None]:
fig = px.histogram(sample_years, x = "% Income Held by Top 10%", color = 'Year', barmode = 'group')
fig.show()

Limitations on the above: Sanctions are marked "true" even for years where no US sanctions were imposed (which would make the data look *worse*. Ironically, however, the distributions look remarkably similar for the poverty data and better for the income inequality data. There may be, however, a sampling bias in the data given the missing values from countries under US sanctions. 

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "% Population Living on <$1.90/Day", 
                 color = "Continent",
                 size= "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"])
fig.show()

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "% Population Living on <$5.50/Day", 
                 color = "Continent",
                 size= "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"])
fig.show()

Deep poverty is less correlated with income inequality, but marginally less deep poverty is suspect in this regard--particularly for countries in North America, South America and Africa. This conclusion is less true for countries in Europe and Asia. Interestingly, GDP Per Capita seems to be negatively correlated with both.

In [None]:
fig = px.scatter(fully_merged, 
                 x = "% Income Held by Top 10%", 
                 y = "GDP_Per_Capita", 
                 hover_data = ["Country", "Year"], 
                 trendline = "ols")
fig.show()

In [None]:
relevant_columns = ["GDP_Per_Capita", 
                    "% Income Held by Top 10%", 
                    "% Population Living on <$5.50/Day", 
                    "% Population Living on <$1.90/Day", 
                    "US Sanctions"]
corrs = fully_merged[relevant_columns].corr()

figure = ff.create_annotated_heatmap(
    z = corrs.values,
    x = list(corrs.columns),
    y = list(corrs.index),
    annotation_text = corrs.round(2).values,
    showscale = True)
figure.show()