In [35]:
import os

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr

% matplotlib inline
sns.set()
sns.set_color_codes()

# Construct xarray for real gdp series (PPP)

### Normalize data sets to be constant to 2015

* Plot timeseries of PPP (Wb and PWT expenditure)

    a) world total 
    
    b) world total, countries with missing data removed
    
    c) mortality
    

| source     | PPP year/source|  
|---------|------|
| WB      |  ICP 2011+ OECD/Eurostats |  
| PWT_rgdpne| ICP 2011+ OECD/Eurostats|

#### Table1: PPP values used by the sources

Status:
**<span style="color:lime; background:green;">In Progress</span>** 
**<span style="color:white; background:maroon;">STARTED</span>**
**<span style="color:white; background:limegreen;">DONE</span>**


In [36]:
dir_path = os.environ['ZERG'] + '{}'                                                                  

### Mortality list of countries

In [37]:
mortality_file= dir_path.format('data/meta_data/mortality_countries.csv')
mortality_countries = pd.read_csv(mortality_file)
mortality_countries.sort_values(by='iso', inplace=True)
mortality_country_map = mortality_countries.set_index('iso').to_dict()['countryName']


### Convenience functions


In [38]:
# lets simplify
def label_axes(_ax, _x_label, _y_label, _title):
    _ax.set_xlabel(_x_label, fontsize=14)
    _ax.set_ylabel(_y_label, fontsize=14)
    _ax.set_title(_title, fontsize=14)

### Plot total

In [63]:
def plot_world(_da):
    _xlab = 'Years ({0})'
    _ylab = 'Gross Domestic Product (2011 international $)'
    _title = 'Total Annual Gross Domestic Product(Real GDP, 2011 PPP series, WB and PWT)' 

    world = _da.sum(dim='iso').to_series().unstack('source')
    world_plt = world.plot(style=['go-','bo-', 'ro-', 'yo-'], 
               figsize=(15,10), grid=True, rot=320, alpha=.7, ms=9)
    label_axes(world_plt, 
              _xlab.format('1950-2016'), 
              _ylab,
              _title)

### Plot country

In [15]:
def plot_country(_da, country_iso, country_name):
    if country_iso not in _da.iso.to_series():
        print("{0}({1}) not in data".format(country_name, country_iso))
    
    _xlab = 'Years ({0})'
    _ylab = 'Gross Domestic Product (2011 international $)'
    _title = '{0} Annual Gross Domestic Product(Real GDP, 2011 PPP series, WB and PWT)' 

#     c = _da.sel(iso=country_iso, year=range(1980,2015)).to_series().unstack('source')
    c = _da.sel(iso=country_iso).to_series().unstack('source')
    c_plt = c.plot(style=['go-','bo-', 'ro-', 'yo-'], 
               figsize=(15,10), grid=True, rot=320, alpha=.7, ms=9)
    label_axes(c_plt, 
              _xlab.format('1950-2016'), 
              _ylab,
              _title.format(country_name))

## Open file

In [40]:
real_gdp_2011_path = dir_path.format('data/output_data/real_gdp_2011_ppp_two_sources.nc')
da = xr.open_dataarray(real_gdp_2011_path)

# World Total GDP (PPP 2011) 

In [62]:
s = da.sum(dim='iso').to_series()
s.index.get_level_values('source')

Index(['pwt_expenditure', 'pwt_expenditure', 'pwt_expenditure',
       'pwt_expenditure', 'pwt_expenditure', 'pwt_expenditure',
       'pwt_expenditure', 'pwt_expenditure', 'pwt_expenditure',
       'pwt_expenditure',
       ...
       'wb ', 'wb ', 'wb ', 'wb ', 'wb ', 'wb ', 'wb ', 'wb ', 'wb ', 'wb '],
      dtype='object', name='source', length=184)

# World Total GDP (only including countries present in both datasets)


In [None]:
plot_world(
    da.loc[{'iso': ~da_imf.sel(year=2014).isnull().any(dim='source').values}],
    'United Nations National Accounts, countries with incomplete coverage dropped')

# Mortality sector countries

In [None]:
for _iso, _name in mortality_country_map.items():
    plot_country(da, _iso, _name)