# Import Libraries

In [None]:
import cartoframes
import pandas as pd
pd.options.display.max_columns = 200
pd.options.display.max_rows = 200
from datetime import datetime

import requests as req
import json
import boto3
from io import BytesIO, StringIO
from gzip import GzipFile
import gzip
import boto3

import sys
import logging
import os
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
import random

import palettable

from functools import reduce
from collections import defaultdict

# Authenticate to RW API

In [None]:
from configparser import ConfigParser
config = ConfigParser()
config.read("/Users/nathansuberi/Desktop/WRI_Programming/cred/.env")
api_token = config.get("auth", "rw_api_token")

AUTH_TOKEN = api_token # <Insert Auth Token Here>

# Authenticating to Carto

In [None]:
CARTO_USER = 'wri-rw'#os.environ.get('CARTO_USER')
CARTO_KEY = ''#os.environ.get('CARTO_KEY')

cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER),
                              api_key=CARTO_KEY)

# Authenticating to S3

In [None]:
aws_access_key_id = ''#os.environ.get('aws_access_key_id')
aws_secret_access_key = ''#os.environ.get('aws_secret_access_key')

s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/georeffed/"

s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)
s3_resource = boto3.resource(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)

# Functions for reading and uploading data to/from S3
def read_from_S3(bucket, key, index_col=0):
    obj = s3_client.get_object(Bucket=bucket, Key=key)
    df = pd.read_csv(BytesIO(obj['Body'].read()), index_col=[index_col], encoding="utf8")
    return(df)

# client: https://gist.github.com/veselosky/9427faa38cee75cd8e27
# resource: https://codereview.stackexchange.com/questions/107412/convert-zip-to-gzip-and-upload-to-s3-bucket
# bucket: https://tobywf.com/2017/06/gzip-compression-for-boto3/
def write_to_S3(df, bucket, key):
    csv_buffer = StringIO()
    # Need to set encoding in Python2... default of 'ascii' fails
    df.to_csv(csv_buffer, encoding='utf-8')
    s3_resource.Object(bucket, key).put(Body=csv_buffer.getvalue())


# Load georeferencing tables

In [None]:
georef = {
    'geometry':cc.read('wri_countries_a'),
    'aliases':cc.read('country_aliases_extended').drop(['index', 'the_geom'], axis=1),
    'known_non_un_isos':cc.read('known_non_un_isos').drop(['index', 'the_geom'], axis=1)
}

georef['iso_aliases'] = georef['aliases'].drop('alias', axis=1).drop_duplicates()

# Prepare data from WB API

In [None]:
#### Download Google Spreadsheets ####
# Georeference Config
!curl "https://docs.google.com/spreadsheets/d/1Naqugy5wQEJQtGZzuNp0JU8WfeubRlRxGaBqgyQDvgg/export?format=tsv" > wbg_config.tsv
wbg_config = pd.read_csv(open("wbg_config.tsv", "r"), sep="\t", index_col=None)
os.remove("wbg_config.tsv")

In [None]:
wbg_config

In [None]:
load_data_from_api('SP.POP.TOTL','National Population WBG')['National Population WBG'][0]

# Helper Functions

In [None]:
# Load data sets into memory for processing
def load_data_from_api(wbg_id, ds_name):
    res = req.get("http://api.worldbank.org/countries/all/indicators/{}?date=1999:2016&format=json&per_page=10000".format(wbg_id))
    data = pd.io.json.json_normalize(res.json()[1])
    data = data[["country.value", "date", "value"]]
    data["value"] = data["value"].astype(float)
    data["date"] = [datetime.strptime(date, '%Y') for date in data['date']]
    data.columns = ["country_name", "year", ds_name]
    return data
    
def georef_by_cname(df):
    # Weird behavior of globals in a local scope here:
    # https://stackoverflow.com/questions/10851906/python-3-unboundlocalerror-local-variable-referenced-before-assignment
    df.index = list(range(df.shape[0]))
    df['join_col'] = df['country_name'].apply(lambda item: item.strip().lower().replace(' ','').replace('’', '\''))
    data_with_alias = df.merge(georef['aliases'],
                       left_on='join_col',
                       right_on='alias',
                       how='left')
    null_aliases = pd.isnull(data_with_alias['alias'])

    if sum(null_aliases):
        no_alias_match = data_with_alias[null_aliases]
        logging.info('no match for these aliases in the data being processed: ')
        missed_aliases = no_alias_match['country_name'].unique()
        logging.info(missed_aliases)

    logging.info('df shape: {}'.format(df.shape))
    logging.info('data_with_alias shape: {}'.format(data_with_alias.shape))

    df['rw_country_code'] = data_with_alias['iso'].values
    df['rw_country_name'] = data_with_alias['name'].values
    df = df.drop('join_col', axis=1)
    return df

def createHeaders():
    return {
        'content-type': "application/json",
        'authorization': "Bearer {}".format( AUTH_TOKEN )
    }

def upload_data_to_cloud(df, cloud_name):
    write_to_S3(df,s3_bucket,'{}{}.csv'.format(s3_folder,cloud_name))
    print('saved {} georeffed data to s3'.format(cloud_name))
    cc.write(df, cloud_name, overwrite=True, privacy='public')
    print('saved {} georeffed data to Carto'.format(cloud_name))
    
def connect_to_rw_backoffice(cloud_name):
    ds_specs = {
        "connectorType":"rest",
        "provider":"cartodb",
        "connectorUrl":"https://wri-rw.carto.com/tables/{}".format(cloud_name),
        "application":["rw"],
        "name":cloud_name
    }

    create_res = req.request("POST", 
                      'https://api.resourcewatch.org/v1/dataset', 
                      data=json.dumps(ds_specs), 
                      headers = createHeaders())

    logging.info(create_res)

    return create_res.json()['data']['id']
    
    
def setup_interaction_config(obj, col, ds):
    _type = str(ds[col].dtype)
    if _type == 'object':
        _type = 'string'
    if _type in ['int64', 'float64']:
        _type = 'numeric'
        
    template = {'column': col,
      'format': None,
      'prefix': '',
      'property': col,
      'suffix': '',
      'type': _type}
    
    obj.append(template)
    return obj
    
def gen_sql(table_name, data_col):
    basesql = ('SELECT wri.cartodb_id, ST_Transform(wri.the_geom, 3857)' +
    ' AS the_geom_webmercator, data.rw_country_name, data.rw_country_code, data.country_name,' +
    ' EXTRACT(YEAR FROM data.year) AS year,'+
    ' data.{} FROM {} data'+
    ' LEFT OUTER JOIN wri_countries_a wri' +
    ' ON data.rw_country_code = wri.iso_a3' + 
    ' WHERE data.{} IS NOT NULL AND data.year=').format(data_col, table_name, data_col)
    return basesql + '\'{}\''

def pick_ramp(len_ramp):
    '''Possibilities: ['Blues', 'BuGn', 'BuPu', 'GnBu', 'Greens', 
            'Greys', 'OrRd', 'Oranges', 'PuBu', 'PuBuGn', 
            'PuRd', 'Purples', 'RdPu', 'Reds', 'YlGn',
           'YlGnBu', 'YlOrBr', 'YlOrRd']'''
    poss_colors = list(palettable.colorbrewer.sequential.__dict__.keys())
    # Don't accept the reverse ramps
    poss_colors = [col for col in poss_colors if (col[-1] == str(len_ramp))]
    ramp = random.choice(poss_colors)
    colors = palettable.colorbrewer.sequential.__dict__[ramp].hex_colors
    return colors

def gen_cartocss_legend(col, breaks, colors):
    cartocss = '#table {polygon-opacity: 1; line-width: 0.5; line-color: #FFF; line-opacity: 1;}'
    #cartocss += ' [{} > {}]?1polygon-fill:{} ;?2'.format(col,breaks[0],colors[0])
    legend = []#[{'color':colors[0], 'name':'>{}'.format(breaks[0])}]
    for i in range(0,len(breaks)-1):
        cartocss += ' [{} > {}][{} < {}]?1polygon-fill:{} ;?2'.format(col,breaks[i],col,breaks[i+1],colors[i])
        legend.append({'color':colors[i], 'name':'<{}'.format(breaks[i+1])})
    cartocss = cartocss.replace('?1', '{').replace('?2', '}')

    return cartocss, legend
    
def autogen_layer_def(year, max_year, rw_id, cloud_name, cartocss, legend, interaction, basesql):  
    str_year = str(year)[:4]
    str_maxyear = str(max_year)[:4]
    layer_name = '{}_{}'.format(cloud_name, str_year)
    layer_name = ' '.join(layer_name.split('_')[2:]).title()
    layer_template = {
          'application': ['rw'],
          'language':'en',
          'applicationConfig': {},
          'dataset': rw_id,
          'default': True if str_year == str_maxyear else False,
          'description': '',
          'env': 'production',
          'interactionConfig': {
              'output':interaction
            },
            'geoInfo':True,
            'type':'tabular',
          'iso': [],
          'layerConfig': {'account': 'wri-rw',
           'body': {'layers': [{'options': {'cartocss': cartocss,
               'cartocss_version': '2.3.0',
               'sql': basesql.format(year)},
              'type': 'mapnik'}],
            'maxzoom': 18,
            'minzoom': 3}},
          'legendConfig': {'items': legend,
           'type': 'choropleth'},
          'name': layer_name,
          'protected': False,
          'provider': 'cartodb'
    }
    
    
    layer_template['layerConfig']['timeline'] = True
    layer_template['layerConfig']['order'] = int(str_year)
    layer_template['layerConfig']['timelineLabel'] = str(str_year)
   
    return layer_template
        
def upload_layer_def_to_backoffice(layer_def, rw_id):
    url = "https://api.resourcewatch.org/v1/dataset/{}/layer" .format(rw_id)
    res = req.request("POST", url, data=json.dumps(layer_def), headers = createHeaders())
    return res.text
    
def load_tags_to_backoffice(tags, rw_id):
    # Here, 'general' is a 'vocabulary id'
    tags = tags.split(';')
    url = "https://api.resourcewatch.org/v1/dataset/{}/vocabulary/general" .format(rw_id)
    res = req.request("POST", url, data=json.dumps({'tags':tags}), headers = createHeaders())
    return res.text
    
def prepare_data(obj, info):
    wri_id = info[0]
    wbg_id = info[1]
    ds_name = info[2].replace(' ', '_').lower()
    cloud_name = '{}_{}'.format(wri_id, ds_name).replace('.', '_')
    units = info[3] # not currently in use
    tags = info[4] # not currently in use
    _format = info[5] # not currently in use
    num_breaks = 7
    
    wbg_ds = load_data_from_api(wbg_id, ds_name)
    wbg_ds_georef = georef_by_cname(wbg_ds)
    
    upload_data_to_cloud(wbg_ds_georef, cloud_name)
    rw_id = connect_to_rw_backoffice(cloud_name)
    
    # Make breaks by quintile
    q0 = 0#wbg_ds_georef[ds_name].quantile(0)
    q20 = 20#wbg_ds_georef[ds_name].quantile(.20)
    q40 = 40#wbg_ds_georef[ds_name].quantile(.40)
    q60 = 60#wbg_ds_georef[ds_name].quantile(.60)
    q80 = 80#wbg_ds_georef[ds_name].quantile(.80)
    q100 = 100#wbg_ds_georef[ds_name].quantile(1.00)
    breaks = [q0,q20,q40,q60,q80,q100]
    colors = pick_ramp(len(breaks)+1)
    
    cartocss, legend = gen_cartocss_legend(ds_name, breaks, colors)
    interaction = reduce(lambda obj, col: setup_interaction_config(obj, col, wbg_ds_georef), wbg_ds_georef.columns, [])
    basesql = gen_sql(cloud_name, ds_name)
    max_year = wbg_ds_georef['year'].max()
    layer_defs = list(map(lambda year: autogen_layer_def(year, max_year, rw_id, cloud_name, cartocss, legend, interaction, basesql), wbg_ds_georef['year'].unique()))
    logging.info(layer_defs)
    
    layer_defs_on_backoffice = list(map(lambda ldef: upload_layer_def_to_backoffice(ldef, rw_id), layer_defs))
    logging.info(layer_defs_on_backoffice)
    #vocab_on_backoffice = load_tags_to_backoffice(tags, rw_id)
    #logging.info(vocab_on_backoffice)
    obj[wri_id] = {
        'rw_id':rw_id,
        'data':wbg_ds_georef,
        'layers':layer_defs_on_backoffice,
        'tags':None #vocab_on_backoffice
    }
    
    return obj

In [None]:
data_tables = reduce(prepare_data, wbg_config.values, {})

# Examine uploaded data

In [None]:
links = [(k, 'https://staging.resourcewatch.org/data/explore/{}'.format(v['rw_id'])) for k, v in data_tables.items()]
list(map(lambda t: print(t[0], t[1]), links))