In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

In [None]:
df = pd.read_pickle('country_data.pkl')
all_countries = pd.DataFrame()

for c,v in df.items():
    temp = pd.DataFrame({c:v.iloc[-1,:]}).T
    all_countries = all_countries.append(temp)

all_countries.loc['UK',:] = all_countries.loc['United Kingdom',:]
all_countries.drop(['conf_over_dead','dead_conf_ratio'],axis=1,inplace=True)
mask = all_countries['deceased'] > 0
all_countries = all_countries[mask]

all_countries['conf_log'] = np.log10(all_countries['confirmed'])
all_countries['dead_log'] = np.log10(all_countries['deceased'])

all_countries

In [None]:
world_facts = pd.read_csv('world_density.csv',sep=';',usecols=[1,2,4,5],index_col=0)
world_facts['pop2020'] *= 1000 # uses dot a thousand separator
world_facts

In [None]:
def strip(x):
    return x.replace('$','').replace(',','')

world_gdp = pd.read_csv('world_gdp.csv',sep=';',header=None,usecols=[1,6],
                        converters={'GDPpc': strip},names=['state','GDPpc'],index_col='state')

world_gdp['GDPpc'] = world_gdp['GDPpc'].astype(int)
world_facts = world_facts.join(world_gdp)
world_facts.rename(columns={'GDPpc' : 'gdp'},inplace=True)
world_facts.loc['US',:] = world_facts.loc['United States',:]
world_facts.loc['UK',:] = world_facts.loc['United Kingdom',:]
world_facts.loc['UK']

In [None]:
world_age = pd.read_csv('world_median_age.csv',sep=r'\t',header=None,
                        usecols=[1,2],index_col=0,names=['state','median_age'],engine='python')

world_facts = world_facts.join(world_age)
world_facts.loc['US']

In [None]:
abb = pd.read_csv('country_abb.csv',sep=';',header=None,index_col=1,names=['abb','state'])
world_facts = world_facts.join(abb)

all_countries = all_countries.join(world_facts)
all_countries['density_log'] = np.log10(all_countries['density'])
all_countries['gdp_log'] = np.log10(all_countries['gdp'])

all_countries.dropna(inplace=True)
all_countries.sort_values('median_age',inplace=True)
all_countries.sort_values('median_age')

In [None]:
all_countries.describe()

In [None]:
import scipy.stats as sps

all_countries['GDP_tot'] = all_countries['gdp'] * all_countries['pop2020']

slope,intercept,_,_,_ = sps.linregress(range(len(all_countries['gdp'])),all_countries['gdp'])
all_countries.sort_values('GDP_tot',ascending=False,inplace=True)

all_countries

In [None]:
quartiles = np.percentile(all_countries['gdp'],[25,50,75])
print (quartiles)

plt.figure(figsize=(36,24))
ax = plt.gca()
ax2 = plt.twinx()

plt.title('World Economic unequality - GDP by country')
ax.bar(all_countries.index,(all_countries['GDP_tot']),label='GDP')
ax.set_xticklabels(all_countries.index,rotation=90)

cumsum = all_countries['GDP_tot'].cumsum()
cumsum_deciles = pd.DataFrame(cumsum.quantile(np.arange(0,1.1,0.1)))
cumsum_deciles['pct'] = cumsum_deciles['GDP_tot'] / cumsum_deciles['GDP_tot'].iloc[-1]
print (cumsum_deciles)

(cumsum / cumsum.iloc[-1]).plot(ax=ax2,label='GDP cumulative',color='r')

ax.legend(loc='upper left')
ax2.legend(loc='upper right')

max_gdp_idx = all_countries['gdp'].argmax()
print (all_countries.iloc[max_gdp_idx])



In [None]:
#Gini index

def capitalize(x):
    if x not in ['US', 'UK']:
        return x.lower().capitalize()
    else:
        return x

gini = pd.read_csv('gini_index.csv',sep=';',header=None,usecols=[1,2],converters={1:capitalize})
gini.rename(columns={1:'state',
                    2:'gini'},inplace=True)
gini.set_index('state',inplace=True)

all_countries = all_countries.join(gini)


In [None]:
all_countries['gini'].isna().sum()

In [None]:
cats = 4

gdp_cat = pd.qcut(all_countries['gdp'],q=cats,labels=range(1,cats + 1))
all_countries['gdp_cat'] = gdp_cat

#all_countries = (all_countries.loc[all_countries['gdp_cat'] >= 9]).copy()
all_countries

In [None]:
all_countries.describe()

In [None]:
all_countries.to_pickle('world_facts.pkl')


In [None]:
plt.figure(figsize=(18,12))
sns.violinplot(data=all_countries,inner='box',x='gdp_cat',y='gdp',scale='count')

In [None]:
all_countries.loc['Sweden']