In [None]:
import pandas as pd
import numpy as np

from fuzzywuzzy import process

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from collections import defaultdict
from scipy.stats import pearsonr

from sklearn.preprocessing import MinMaxScaler

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

In [None]:
def prep():
    Gdf = pd.read_csv('../input/EF_GDP(constant2010USD).csv')
    Gdf = Gdf.reset_index()
    
    EFCdf = pd.read_csv('../input/NFA 2017 Edition.csv')
    EFCdf.rename(columns={'country':'Country'}, inplace=True)
    
    return Gdf

In [None]:
def dplot(diffdf, min_year, max_year):   
     
    fig, ax = plt.subplots(figsize=(15,13))
    sns.regplot(data=diffdf, x='GDPDelta_Rank', y='EFDelta_Rank', scatter_kws={'s':diffdf['GDP_std'],'color':'b'}, 
                fit_reg=False, ax=ax)
    plt.title('Growth in GDP vs. Ecological Footprint\n%s - %s' \
              % (min_year, max_year), fontsize=20)
    plt.xlabel('GDP Growth', fontsize=18)
    plt.ylabel('Ecological Footprint Growth', fontsize=18)
    
    xlocs, xlabels = plt.xticks()
    xlabels = [x.get_text() for x in xlabels]
    xlabels[1:-2] = \
       ['%.2f%s' % (diffdf.GDPDelta_P.sort_values().reset_index(drop=True)[i], '%') for i in xlocs[1:-2]]
    xlabels[-2] = '%.2f%s' % (diffdf.GDPDelta_P.sort_values().reset_index(drop=True).values[-1], '%')
    xlocs[-2] = 154
    plt.xticks(xlocs[1:-1], xlabels[1:-1], fontsize=14)
    
    ylocs, ylabels = plt.yticks()
    ylabels = [x.get_text() for x in ylabels]
    ylabels[1:-2] = ['%.2f%s' % (diffdf.EFDelta_P.sort_values().reset_index(drop=True)[i], '%') for i in ylocs[1:-2]]
    ylabels[-2] = '%.2f%s' % (diffdf.EFDelta_P.sort_values(ascending=False).reset_index(drop=True).values[0], '%')
    ylocs[-2] = 156
    plt.yticks(ylocs[1:-1], ylabels[1:-1], fontsize=14)
    
    downs = ['Jordan', 'Iran', 'Austria', 'Morocco', 'Samoa', 'Guinea-Bissau',
            'Barbados', 'Cyprus', 'Venezuela', 'Serbia', 'Madagascar', 'Rwanda',
            'El Salvador', 'Sweden']
    ups = ['Dominican Republic', 'India', 'Haiti', 'Netherlands', 'Switzerland', 'Lebanon',
          'Botswana', 'South Africa', 'Poland', 'Czech Republic', 'Cuba', 'Nepal',
          'Spain', 'Portugal', 'Dominica', 'Greece', 'Ukraine', 'Slovenia', 'Hungary',
          'Latvia']
    
    for i, txt in enumerate(diffdf.Country.values):
        if txt in downs:
            ax.annotate(txt, (diffdf.GDPDelta_Rank[i]+0.8, diffdf.EFDelta_Rank[i]),size=13, color='k',
                   ha='left', va='top')
        elif txt in ups:
            ax.annotate(txt, (diffdf.GDPDelta_Rank[i]+0.8, diffdf.EFDelta_Rank[i]),size=13, color='k',
                   ha='left', va='bottom')
        elif txt in ['Canada', 'Brazil', 'Italy', 'France', 'China', 'Russian Federation',
                    'United Kingdom', 'Japan', 'Australia', 'Mexico', 'Germany']:
            ax.annotate(txt, (diffdf.GDPDelta_Rank[i]+2, diffdf.EFDelta_Rank[i]), size=13, color='k',
                       ha='left', va='center')
        elif txt in ['United States of America']:
            ax.annotate(txt, (diffdf.GDPDelta_Rank[i]+4.2, diffdf.EFDelta_Rank[i]), size=13, color='k',
                       ha='left', va='bottom')
        
        else:
            ax.annotate(txt, (diffdf.GDPDelta_Rank[i]+0.8, diffdf.EFDelta_Rank[i]),size=13, color='k',
                   ha='left', va='center')
            
        

    
    bbox_props = dict(boxstyle="round", facecolor='lightgray',ec='k', lw=1)  
    ax.annotate('n=107\n(+)EF, (+)GDP', xy=(40, 155), size=14, bbox=bbox_props)
    ax.annotate('n=36\n(-)EF, (+)GDP', xy=(142, -2), size=14, bbox=bbox_props)
    ax.annotate('n=9\n(-)EF, (-)GDP', xy=(-5, 39), size=13, bbox=bbox_props)
    ax.annotate('n=1\n(+)EF, (-)GDP', xy=(-5, 155), size=13, bbox=bbox_props)
    plt.axhline(y=45.5, ls='-.', c='b')
    plt.axvline(x=10.5, ls='-.', c='b')
              
    plt.show()

In [None]:
dplot(prep(), '2009', '2013')

I love data!