# Import Libraries

In [4]:
import cartoframes
import pandas as pd
pd.options.display.max_columns = 200

import requests as req
import json
import boto3
import io

import sys
#import logging
import os
#logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
import random

from functools import reduce
from collections import defaultdict

# Authenticating to Carto

In [None]:
CARTO_USER = 'wri-rw'#os.environ.get('CARTO_USER')
CARTO_KEY = ''#os.environ.get('CARTO_KEY')

cc = cartoframes.CartoContext(base_url='https://{}.carto.com/'.format(CARTO_USER),
                              api_key=CARTO_KEY)

# Authenticating to S3

In [None]:
aws_access_key_id = #os.environ.get('aws_access_key_id')
aws_secret_access_key = #os.environ.get('aws_secret_access_key')

s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/wide_to_long/"

s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)
s3_resource = boto3.resource(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)

# Functions for reading and uploading data to/from S3
def read_from_S3(bucket, key, index_col=0):
    obj = s3_client.get_object(Bucket=bucket, Key=key)
    df = pd.read_csv(io.BytesIO(obj['Body'].read()), index_col=[index_col], encoding="utf8")
    return(df)

def write_to_S3(df, bucket, key):
    csv_buffer = io.StringIO()
    # Need to set encoding in Python2... default of 'ascii' fails
    df.to_csv(csv_buffer, encoding='utf-8')
    s3_resource.Object(bucket, key).put(Body=csv_buffer.getvalue())

# Load data from RW API

In [21]:
# Base URL for getting dataset metadata from RW API
url = "https://api.resourcewatch.org/v1/dataset?sort=slug,-provider,userId&status=saved&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html
datasets_on_api = {}
for ix, dset in enumerate(data):
    atts = dset["attributes"]
    metadata = atts["metadata"]
    layers = atts["layer"]
    widgets = atts["widget"]
    tags = atts["vocabulary"]
    datasets_on_api[dset["id"]] = {
        "name":atts["name"],
        "table_name":atts["tableName"],
        "provider":atts["provider"],
        "date_updated":atts["updatedAt"],
        "num_metadata":len(metadata),
        "metadata": metadata,
        "num_layers":len(layers),
        "layers": layers,
        "num_widgets":len(widgets),
        "widgets": widgets,
        "num_tags":len(tags),
        "tags":tags
    }

# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
current_datasets_on_api = pd.DataFrame.from_dict(datasets_on_api, orient='index')
current_datasets_on_api.index.rename("Dataset", inplace=True)
current_datasets_on_api.sort_values(by=["date_updated"], inplace=True, ascending = False)

# Select all Carto datasets on the API:
provider = "cartodb"
carto_ids = (current_datasets_on_api["provider"]==provider)
carto_data = current_datasets_on_api.loc[carto_ids]

print("Number of Carto datasets: " + str(carto_data.shape[0]))

Number of Carto datasets: 237


# Load georeferencing config & data

In [3]:
georef = {
    'geometry':cc.read('wri_countries_a'),
    'aliases':cc.read('country_aliases_extended'),
    'known_non_un_isos':cc.read('known_non_un_isos')
}

#### Download Google Spreadsheets ####
# Additional Alias List
!curl "https://docs.google.com/spreadsheets/d/11k_6GbFgtF6eAQ3iAjPzt2KWc2n0SsP5P6g7kqILbkM/export?format=tsv" > additional_aliases.tsv
additional_aliases = pd.read_csv(open("additional_aliases.tsv", "r"), sep="\t", index_col=[0])
os.remove("additional_aliases.tsv")

NameError: name 'cc' is not defined

In [29]:
# Read in data sets info from config file
georef_config = pd.read_csv('/Users/nathansuberi/Desktop/RW_Data/georeferencing_tasks/georef_these.csv')
georef_config = georef_config.set_index('wri_id')
georef_config

#### Download Google Spreadsheets ####
# Georeference Config
!curl "https://docs.google.com/spreadsheets/d/1S4Zh8V_keiDhqfxlATyC8veb3LtZ5W6uM1dyogOL7f0/export?format=tsv" > georef_config.tsv
georef_config = pd.read_csv(open("georef_config.tsv", "r"), sep="\t", index_col=[0])
os.remove("georef_config.tsv")

Unnamed: 0_level_0,rw_id,country_name,country_code
wri_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
com.009,c61c364b-1d68-4dd9-ae3d-76c2a0022280,,isoalpha3
cit.013,5d269c36-6ccf-4620-838d-431f86c30f69,country,
cit.020,6d3163f5-4e08-4830-84f1-2c5d76570a82,country_name,country_code
cli.022,995ec4fe-b3cc-4cf4-bd48-b89d4e3ea072,countryname,iso3v10
ene.012,d446a52e-c4c1-4e74-ae30-3204620a0365,country_name,country_code
for.020,03bfb30e-829f-4299-bab9-b2be1b66b5d4,country,
soc.001,0b9f0100-ce5b-430f-ad8f-3363efa05481,country,
soc.002,d4ca3cc4-c162-469c-b341-b52284a73eaa,country,
soc.012,f48541d3-a622-4908-9400-5ef26257ac96,country,
soc.021,e7582657-9c16-4eb1-89e8-0211d94015c6,country,


In [30]:
# Load data sets into memory for processing
def load_data(obj, elem):
    print(elem)
    wri_id = elem[0].strip()
    rw_id = elem[1].strip()
    try:
        table_name = carto_data.loc[rw_id]['table_name']
        obj[wri_id] = cc.read(table_name)
        print('Table shape: {}'.format(obj[wri_id].shape))
    except:
        obj[wri_id] = 'Unavailable'
        print('Unavailable')
    return obj

data_tables = reduce(load_data, zip(georef_config.index,georef_config['rw_id']), {})

('com.009', ' c61c364b-1d68-4dd9-ae3d-76c2a0022280')
Table shape: (841387, 10)
('cit.013', '5d269c36-6ccf-4620-838d-431f86c30f69')
Table shape: (194, 13)
('cit.020', '6d3163f5-4e08-4830-84f1-2c5d76570a82')
Table shape: (218, 63)
('cli.022', '995ec4fe-b3cc-4cf4-bd48-b89d4e3ea072')
Table shape: (232, 34)
('ene.012', 'd446a52e-c4c1-4e74-ae30-3204620a0365')
Table shape: (4825, 10)
('for.020', '03bfb30e-829f-4299-bab9-b2be1b66b5d4')
Table shape: (236, 13)
('soc.001', '0b9f0100-ce5b-430f-ad8f-3363efa05481')
Unavailable
('soc.002', 'd4ca3cc4-c162-469c-b341-b52284a73eaa')
Table shape: (195, 16)
('soc.012', 'f48541d3-a622-4908-9400-5ef26257ac96')
Table shape: (590, 7)
('soc.021', 'e7582657-9c16-4eb1-89e8-0211d94015c6')
Table shape: (180, 41)
('soc.022', '773a16a7-3531-4b56-8253-babd15ad7f87')
Table shape: (52, 11)
('soc.024', '6c6e70e7-5a19-46f2-9d95-34789fd20adc')
Table shape: (104, 8)
('soc.026', '0be2ce12-79b3-434b-b557-d6ea92d787fe')
Table shape: (144, 13)
('soc.045', '2cc29514-b97a-4103-92

# Configuring the alias table

In [190]:
df = georef['aliases']

## Adding all countries from our wri-bounds shapefile to the alias table

new_aliases = georef['geometry'][['iso_a3', 'name']].copy()
new_aliases['alias'] = new_aliases['name']
cols = ['iso' if col=='iso_a3' else col for col in new_aliases.columns]
cols = [col.strip() for col in cols]
new_aliases.columns = cols

print('Existing aliases')
print(df.columns)
print(df.shape)
print('Adding aliases from country table')
print(new_aliases.columns)
print(new_aliases.shape)

df = df.append(new_aliases)

## Adding in new aliases identified by team
print('Adding aliases from csv')
new_aliases = pd.read_csv(ADDITIONAL_ALIASES, header=0)
new_aliases.columns = ['alias', 'name', 'iso']
print(new_aliases.head(5))

df = df.append(new_aliases)

# Make all aliases lower case, remove spacing
df['alias'] = [alias.strip().lower().replace(' ','') for alias in df['alias']]

## check / remove duplicates
sum(df.duplicated(subset=['alias']))
sum(df.duplicated(subset=['name']))
sum(df.duplicated(subset=['iso']))

try:
    df = df.drop('the_geom', axis=1)
except:
    print('unable to drop the_geom from {} data'.format(wri_id))
try:
    df = df.drop('cartodb_georef_status', axis=1)
except:
    print('unable to drop cartodb_georef_status from {} data'.format(wri_id))
try:
    df = df.drop('index', axis=1)
except:
    print('unable to drop index from {} data'.format(wri_id))
try:
    df = df.drop('cartodb_id', axis=1)
except:
    print('unable to drop cartodb_id from {} data'.format(wri_id))

df = df.drop_duplicates()
    
georef['aliases'] = df

print('Size of current aliasing table: ' + str(georef['aliases'].shape))

Existing aliases
Index(['alias', 'index', 'iso', 'name', 'the_geom'], dtype='object')
(548, 5)
Adding aliases from country table
Index(['iso', 'name', 'alias'], dtype='object')
(193, 3)
Adding aliases from csv
                                   alias                              name  \
0      Venezuela, Bolivarian Republic of                         Venezuela   
1  Democratic People's Republic of Korea                       North Korea   
2       Micronesia (Federated States of)    Federated States of Micronesia   
3                        Dem. Rep. Congo  Democratic Republic of the Congo   
4       Bolivia (Plurinational State of)                           Bolivia   

   iso  
0  VEN  
1  PRK  
2  FSM  
3  COD  
4  BOL  
unable to drop cartodb_georef_status from cli.022 data
unable to drop cartodb_id from cli.022 data
Size of current aliasing table: (309, 3)


In [194]:
LOOK_FOR_ISO='fsm'
LOOK_FOR_NAME='korea'
georef['aliases']['iso'].fillna('', inplace=True)
print(georef['aliases'].loc[georef['aliases']['iso'].str.lower().str.contains(LOOK_FOR_ISO)])
print()
print(georef['aliases'].loc[georef['aliases']['name'].str.lower().str.contains(LOOK_FOR_NAME)])

                             alias  iso                            name
63                      micronesia  FSM  Federated States of Micronesia
182    federatedstatesofmicronesia  FSM  Federated States of Micronesia
260            micronesia,fed.sts.  FSM  Federated States of Micronesia
265   micronesia,federatedstatesof  FSM  Federated States of Micronesia
2    micronesia(federatedstatesof)  FSM  Federated States of Micronesia

                                  alias  iso               name
5                            koreanorth  PRK        North Korea
6                            koreasouth  KOR        South Korea
39                       korea,dem.rep.  PRK        North Korea
58                      republicofkorea  KOR        South Korea
61    democraticpeople'srepublicofkorea  PRK        North Korea
70                           korea,rep.  KOR        South Korea
85                                korea  KOR        South Korea
130                          southkorea  KOR        Sou

# Perform georeferencing

In [215]:
# Tracking all mis-matched names
missed_names = {}
missed_isos = {}

alias_info = georef['aliases']

georefed_data = {}

for wri_id, data in data_tables.items():
    
    print('Processing table ' + wri_id)
    if type(data) == str:
        print('Unavailable, skipping')
        georefed_data[wri_id] = 'Unavailable'
        continue
    
    print('Table head: {}'.format(data.head(5)))
    
    ### WARNING: non standardized indices in the data cause problems after the merge step
    data = data.copy()
    data.index = list(range(data.shape[0]))

    c_code = georef_config.loc[wri_id, 'country_code']
    c_code = None if pd.isnull(c_code) else c_code
    c_name = georef_config.loc[wri_id, 'country_name']
    c_name = None if pd.isnull(c_name) else c_name
    
    print('c_code: ***' + str(c_code) + '***')
    print('c_name: ***' + str(c_name) + '***')
    
    # Check if isos match our table
    
    if c_code:
        print('already has an iso3 code, in column ' + c_code)
        data_with_alias = data.merge(alias_info,
                           left_on=c_code,
                           right_on='iso', 
                           how='left')
        
        null_isos = pd.isnull(data_with_alias['iso'])
        if sum(null_isos):
            no_iso_match = data_with_alias[null_isos]
            print('no match for these isos in the data being processed: ')
            print(no_iso_match[c_code].unique())
            try:
                print(no_iso_match[c_code].unique())
                missed_isos[wri_id] = no_iso_match[c_code].unique()
            except:
                c_code = c_code+'_x'
                print(no_iso_match[c_code].unique())
                missed_isos[wri_id] = no_iso_match[c_code].unique()
    
        ### data IS ALTERED HERE

        try:
            data['rw_country_code'] = data_with_alias['iso']
        except:
            data['rw_country_code'] = data_with_alias['iso_y']
            
        try:
            data['rw_country_name'] = data_with_alias['name']  
        except:
            data['rw_country_name'] = data_with_alias['name_y'] 
            
        try:
            data = data.drop('the_geom', axis=1)
        except:
            print('unable to drop the_geom from {} data'.format(wri_id))
            
        try:
            data = data.drop('cartodb_georef_status', axis=1)
        except:
            print('unable to drop cartodb_georef_status from {} data'.format(wri_id))

        continue
    
    # If country name is supplied, check how many match up with alias/name in country_aliases
    if c_name:       
        # Ensure that leading or trailing spaces don't break the match
        #data[c_name] = ['North Korea' if name=='Korea, Dem. People\x92s Rep.' else name for name in data[c_name]]
    
        _data = data.copy()
        _data['join_col'] = data[c_name].apply(lambda item: item.strip().lower().replace(' ','').replace('’', '\''))
    
        data_with_alias = _data.merge(alias_info, 
                                         left_on = 'join_col',
                                         right_on = 'alias',
                                         how='left') 

        null_aliases = pd.isnull(data_with_alias['alias'])             
            
        print('data with alias df:')
        print(data_with_alias.shape)
        print(data_with_alias.head(6))
        print('raw data')
        print(data.shape)
        print(data.head(5))
    
        
        if sum(null_aliases):
            no_alias_match = data_with_alias.loc[null_aliases]
            print('missed aliases, matching on column "alias" of country_aliases')
            print(no_alias_match)
            try:
                print(no_alias_match[c_name].unique())
                missed_names[wri_id] = no_alias_match[c_name].unique()
            except:
                c_name = c_name+'_x'
                print(no_alias_match[c_name].unique())
                missed_names[wri_id] = no_alias_match[c_name].unique()
                
        ### data IS ALTERED HERE

        try:
            data['rw_country_code'] = data_with_alias['iso']
        except:
            data['rw_country_code'] = data_with_alias['iso_y']
            
        try:
            data['rw_country_name'] = data_with_alias['name']  
        except:
            data['rw_country_name'] = data_with_alias['name_y'] 
            
        try:
            data = data.drop('the_geom', axis=1)
        except:
            print('unable to drop the_geom from {} data'.format(wri_id))
            
        try:
            data = data.drop('cartodb_georef_status', axis=1)
        except:
            print('unable to drop cartodb_georef_status from {} data'.format(wri_id))

        
    ### SUCCESS
    print('Final head:')
    print(data.head(5))
    georefed_data[wri_id+'_georefed'] = data.copy()

Processing table com.009
Table head:                   flow   index isoalpha3  mfa13     mfa4 rw_country_code  \
cartodb_id                                                                 
1           Population  181182       CPV   None     None             CAN   
2              Exports  469928       LVA   Wood  Biomass             GNB   
3                  PTB     333       CRI   Wood  Biomass             CHL   
4           Population     753       KHM   None     None             FJI   
5                  DMI    4184       AFG  Crops  Biomass             AFG   

           rw_country_name the_geom  time        value  
cartodb_id                                              
1                   Canada     None  1983    302.17400  
2            Guinea Bissau     None  1992     64.40704  
3                    Chile     None  1970    201.10000  
4                     Fiji     None  1999  11928.30600  
5              Afghanistan     None  1997   5844.51000  
c_code: ***isoalpha3***
c_name:

# Checking for missed names

In [216]:
print(missed_names)
print(missed_isos)

print('Newly missed names:')
for wri_id, names in missed_names.items():
    print('Missed names in data set {}'.format(wri_id))
    for name in names:
        if name not in georef['known_non_un_isos']['name'].values:
            print(name)
        
print('Newly missed isos:')
for wri_id, isos in missed_isos.items():
    print('Missed isos in data set {}'.format(wri_id))
    for iso in isos:
        if iso not in georef['known_non_un_isos']['iso'].values:
            print(iso)

{'cit.013': array(['CookIslands', 'Niue'], dtype=object), 'for.020': array(['Svalbard and Jan Mayen Islands', 'Tokelau', 'Guam', 'Holy See',
       'Saint Barthélemy', 'Saint Helena, Ascension and Tristan da Cunha',
       'American Samoa', 'Anguilla', 'Aruba', 'Curaçao',
       'Falkland Islands (Malvinas)', 'French Polynesia', 'Gibraltar',
       'Greenland', 'Guernsey', 'Norfolk Island',
       'Occupied Palestinian Territory', 'Saint Martin (French part)',
       'Sint Maarten (Dutch Part)', 'United States Virgin Islands',
       'Wallis and Futuna Islands', 'Bonaire, Sint Eustatius and Saba',
       'British Virgin Islands', 'Saint Pierre and Miquelon', 'Bermuda',
       'Cayman Islands', 'Cook Islands', 'Guadeloupe', 'Faroe Islands',
       'French Guiana', 'Jersey', 'Isle of Man', 'Martinique',
       'Montserrat', 'New Caledonia', 'Mayotte',
       'Northern Mariana Islands', 'Niue', 'Puerto Rico',
       'Pitcairn Islands', 'Réunion', 'Turks and Caicos Islands',
       'Wester

In [211]:
## Process to investigate misses for a specific dataset
# Here, discovered issue with using apostrophe "’" instead of "'"
# Led to augmenting data prep for the georeferencing step above

df = data_tables['for.020']
df['join_col'] = df['country'].apply(lambda item: item.strip().lower().replace(' ',''))

df_a = df.merge(georef['aliases'],
                           left_on='join_col',
                           right_on='alias', 
                           how='left')
df_a.loc[pd.isnull(df_a['alias']),['country','join_col','alias', 'iso', 'name']]

print('democraticpeople’srepublicofkorea' in georef['aliases']['alias'])

LOOK_FOR = 'people\'s'
print('Viewing aliases with a name that contains {}:'.format(LOOK_FOR))
df = georef['aliases']
print(df.loc[df['alias'].str.lower().str.contains(LOOK_FOR)])

False

# Compile known not-included country names

In [90]:
def flatten(obj, new_list):
    obj.extend(new_list)
    return obj

other_isos = reduce(flatten, missed_isos.values(), [])
other_names = reduce(flatten, missed_names.values(), [])

def gather_names(iso):
    name = input('Official name of {}?'.format(iso))
    return((iso, name))

def gather_isos(name):
    iso = input('Official iso of {}?'.format(name))
    return((iso, name))

isos_and_names = list(map(gather_names, other_isos))
names_and_isos = list(map(gather_isos, other_names))

Official name of XKX?Kosovo
Official name of PSE?Palestinian Territory, Occupied
Official name of TWN?Taiwan, Province of China
Official name of SUN?Union of Soviet Socialist Republics
Official name of SJM?Svalbard and Jan Mayen
Official name of CSK?Czechoslovakia
Official name of GIB?Gibraltar
Official name of CHI?None
Official name of REU?Réunion
Official name of HKG?Hong Kong
Official name of CUW?Curaçao
Official name of MSR?Montserrat
Official name of PRI?Puerto Rico
Official name of VAT?Holy See
Official name of TCA?Turks and Caicos Islands
Official name of MNP?Northern Mariana Islands
Official name of FLK?Falkland Islands (Malvinas)
Official name of MAC?Macao
Official name of NFK?Norfolk Island
Official name of TKL?Tokelau
Official name of VIR?Virgin Islands, U.S.
Official name of ABW?Aruba
Official name of ASM?American Samoa
Official name of PYF?French Polynesia
Official name of SCG?Serbia and Montenegro
Official name of COK?Cook Islands
Official name of GGY?Guernsey
Official na

In [156]:
print(isos_and_names)
print(names_and_isos)

[('XKX', 'Kosovo'), ('PSE', 'Palestinian Territory, Occupied'), ('TWN', 'Taiwan, Province of China'), ('SUN', 'Union of Soviet Socialist Republics'), ('SJM', 'Svalbard and Jan Mayen'), ('CSK', 'Czechoslovakia'), ('GIB', 'Gibraltar'), ('CHI', 'None'), ('REU', 'Réunion'), ('HKG', 'Hong Kong'), ('CUW', 'Curaçao'), ('MSR', 'Montserrat'), ('PRI', 'Puerto Rico'), ('VAT', 'Holy See'), ('TCA', 'Turks and Caicos Islands'), ('MNP', 'Northern Mariana Islands'), ('FLK', 'Falkland Islands (Malvinas)'), ('MAC', 'Macao'), ('NFK', 'Norfolk Island'), ('TKL', 'Tokelau'), ('VIR', 'Virgin Islands, U.S.'), ('ABW', 'Aruba'), ('ASM', 'American Samoa'), ('PYF', 'French Polynesia'), ('SCG', 'Serbia and Montenegro'), ('COK', 'Cook Islands'), ('GGY', 'Guernsey'), ('BMU', 'Bermuda'), ('GUF', 'French Guiana'), ('NCL', 'New Caledonia'), ('SHN', 'Saint Helena, Ascension and Tristan da Cunha'), ('INX', 'None'), ('GUM', 'Guam'), ('SXM', 'Sint Maarten (Dutch part)'), ('IMN', 'Isle of Man'), ('MTQ', 'Martinique'), ('ANT

In [170]:
# Deleted isos
deleted_isos = '''AFI French Afar and Issas
ATB British Antarctic Territory
ATN Dronning Maud Land
CTE Canton and Enderbury Islands
DDR German Democratic Republic
DHY Dahomey
GEL Gilbert and Ellice Islands
HVO Upper Volta
JTN Johnston Island
MID Midway Islands
NHB New Hebrides
PCI Pacific Islands, Trust Territory of the
PCZ Panama Canal Zone
PHI Philippines – Code changed to PHL
PUS U.S. Miscellaneous Pacific Islands
RHO Southern Rhodesia
SKM Sikkim
VDR Viet-Nam, Democratic Republic of
WAK Wake Island
YMD Yemen, Democratic'''

by_line = deleted_isos.split('\n')
by_pair = [(line.split(' ')[0],' '.join(line.split(' ')[1:]) ) for line in by_line]
deleted_isos = pd.DataFrame(by_pair)
deleted_isos.columns = ['iso', 'name']

not_reported_isos = pd.DataFrame(isos_and_names)
not_reported_isos.columns = ['iso', 'name']

not_reported_names = pd.DataFrame(names_and_isos)
not_reported_names.columns = ['iso', 'name']
not_reported_names = not_reported_names.drop([0, 7, 19, 32])

deleted_isos['reason'] = 'deleted'
not_reported_isos['reason'] = 'not un'
not_reported_names['reason'] = 'not un'

In [171]:
df = deleted_isos.copy()
df = df.append(not_reported_isos)
df = df.append(not_reported_names)

cc.write(df, 'known_non_un_isos')

Table successfully written to CARTO: https://wri-rw.carto.com/dataset/known_non_un_isos


# Uploading finished files to Carto and S3

In [47]:
for name, data in georefed_data.items():
    if 'georef' in name:
        #print(data.head())
        #write_to_S3(data,s3_bucket,s3_folder+name+'_long')
        #print('saved ' + name + ' long data to s3')
        cc.write(data, name, overwrite=True)
        print('saved ' + name + ' long data to Carto')
        #print('failed to write table ' + name)

  warn('Table will be named `{}`'.format(table_name))
Uploading in batches: 100%|██████████| 2/2 [00:56<00:00, 30.79s/it]


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/com_009_georefed
saved com.009_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/cit_013_georefed
saved cit.013_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/cit_020_georefed
saved cit.020_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/cli_022_georefed
saved cli.022_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/ene_012_georefed
saved ene.012_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/for_020_georefed
saved for.020_georefed long data to Carto
The following columns were changed in the CARTO copy of this dataframe:
[1mestimated_gross_national_income_per_capita___female[0m -> [1mestimated_gross_national_income_per_capita__female[0m
[1mestimated_gross_national_income_per_capita___male[0m -> [1mestimated_gross_national_income_per_capita__male[0m
[1mexpected_years_of_schooling___female[0m -> [1mexpected_years_of_schooling__female[0m
[1mexpected_years_of_schooling___male[0m -> [1mexpected_years_of_schooling__male[0m
[1mhdi_value___female[0m -> [1mhdi_value__female[0m
[1mhdi_value___male[0m -> [1mhdi_value__male[0m
[1mlife_expectancy_at_birth___female[0m -> [1mlife_expectancy_at_birth__female[0m
[1mlife_expectancy_at_birth___male[0m -> [1mlife_expectancy_at_birth__male[0m
[1mmean_years_of_schooling___female[0m -> [1mmean_years_of_schooling__female[0m
[1mmean_years_of_s

  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_002_georefed
saved soc.002_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))
  query=alter_query))
  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_012_georefed
saved soc.012_georefed long data to Carto
The following columns were changed in the CARTO copy of this dataframe:
[1mair_pollution___average_exposure_to_no2[0m -> [1mair_pollution__average_exposure_to_no2[0m
[1mair_pollution___average_exposure_to_pm2_5[0m -> [1mair_pollution__average_exposure_to_pm2_5[0m
[1mair_pollution___average_exposure_to_pm2_5___risk_exposure[0m -> [1mair_pollution__average_exposure_to_pm2_5__risk_exposure[0m
[1mair_pollution___average_pm2_5_exceedance[0m -> [1mair_pollution__average_pm2_5_exceedance[0m
[1meh___air_quality[0m -> [1meh__air_quality[0m
[1meh___health_impacts[0m -> [1meh__health_impacts[0m
[1meh__water_and_sanitation[0m -> [1meh_water_and_sanitation[0m
[1mev___agriculture[0m -> [1mev__agriculture[0m
[1mev___climate_and_energy[0m -> [1mev__climate_and_energy[0m
[1mev___fisheries[0m -> [1mev__fisheries[0m
[1mev___forests[0m -

  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_022_georefed
saved soc.022_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_024_georefed
saved soc.024_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_026_georefed
saved soc.026_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_045_georefed
saved soc.045_georefed long data to Carto


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_055_georefed
saved soc.055_georefed long data to Carto
The following columns were changed in the CARTO copy of this dataframe:
[1mlosses_per_gdp___rank[0m -> [1mlosses_per_gdp__rank[0m
[1mlosses_per_gdp___total[0m -> [1mlosses_per_gdp__total[0m


  warn('Table will be named `{}`'.format(table_name))


Table successfully written to CARTO: https://wri-rw.carto.com/dataset/soc_067_georefed
saved soc.067_georefed long data to Carto


In [44]:
for name, data in georefed_data.items():
    if 'georef' in name:
        print(name)

com.009_georefed
cit.013_georefed
cit.020_georefed
cli.022_georefed
ene.012_georefed
for.020_georefed
soc.002_georefed
soc.012_georefed
soc.021_georefed
soc.022_georefed
soc.024_georefed
soc.026_georefed
soc.045_georefed
soc.055_georefed
soc.067_georefed


# Updating Layers on Backoffice

('com.009', ' c61c364b-1d68-4dd9-ae3d-76c2a0022280')
Num layers: 4
('cit.013', '5d269c36-6ccf-4620-838d-431f86c30f69')
Num layers: 2
('cit.020', '6d3163f5-4e08-4830-84f1-2c5d76570a82')
Num layers: 1
('cli.022', '995ec4fe-b3cc-4cf4-bd48-b89d4e3ea072')
Num layers: 2
('ene.012', 'd446a52e-c4c1-4e74-ae30-3204620a0365')
Num layers: 3
('for.020', '03bfb30e-829f-4299-bab9-b2be1b66b5d4')
Num layers: 2
('soc.001', '0b9f0100-ce5b-430f-ad8f-3363efa05481')
Num layers: 1
('soc.002', 'd4ca3cc4-c162-469c-b341-b52284a73eaa')
Num layers: 1
('soc.012', 'f48541d3-a622-4908-9400-5ef26257ac96')
Num layers: 1
('soc.021', 'e7582657-9c16-4eb1-89e8-0211d94015c6')
Num layers: 1
('soc.022', '773a16a7-3531-4b56-8253-babd15ad7f87')
Num layers: 1
('soc.024', '6c6e70e7-5a19-46f2-9d95-34789fd20adc')
Num layers: 1
('soc.026', '0be2ce12-79b3-434b-b557-d6ea92d787fe')
Num layers: 1
('soc.045', '2cc29514-b97a-4103-92b1-c8c8e9268cd8')
Num layers: 1
('soc.055', '795a7ceb-ebc1-4479-95ad-76ea4d045ad3')
Num layers: 1
('soc.067

com.009
Index(['flow', 'index', 'isoalpha3', 'mfa13', 'mfa4', 'rw_country_code',
       'rw_country_name', 'the_geom', 'time', 'value'],
      dtype='object')
data column?value
date column?time
make slider? type anything for yes
filter col?mfa4
Available dates: [1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017]
year for layer?2017
Type of data: numeric
Type of data: numeric
Type of data: numeric
Type of data: numeric
cit.013
Index(['cartodb_georef_status', 'country',
       'infectious_parasitic_neonatal_and_nutritional_age_standardize',
       'infectious_parasitic_neonatal_and_nutritional_disability_adju',
       'injuries_age_standardized_dalys_attributable_to_the_environment',
       'injuries_disability_adjusted_life_years_dalys_attributable_to',


KeyError: ''

In [293]:
layers

defaultdict(list,
            {'com.009': [{'attributes': {'application': ['rw'],
                'applicationConfig': {},
                'dataset': ' c61c364b-1d68-4dd9-ae3d-76c2a0022280',
                'default': False,
                'description': '',
                'env': 'production',
                'interactionConfig': [{'column': 'flow',
                  'format': None,
                  'prefix': '',
                  'property': 'flow',
                  'suffix': '',
                  'type': 'string'},
                 {'column': 'index',
                  'format': None,
                  'prefix': '',
                  'property': 'index',
                  'suffix': '',
                  'type': 'numeric'},
                 {'column': 'isoalpha3',
                  'format': None,
                  'prefix': '',
                  'property': 'isoalpha3',
                  'suffix': '',
                  'type': 'string'},
                 {'column': 'mfa13',
     

In [274]:
gen_cartocss_legend('gross_value_added_wood_processing_us_million', data_tables['for.020'])

Type of data: numeric


('#table {polygon-opacity: 1; line-width: 0.5; line-color: #FFF; line-opacity: 1;} [gross_value_added_wood_processing_us_million > 0.0]{polygon-fill:#42b64a ;} [gross_value_added_wood_processing_us_million > 0.0][gross_value_added_wood_processing_us_million < 5.0]{polygon-fill:#b1434b ;} [gross_value_added_wood_processing_us_million > 5.0][gross_value_added_wood_processing_us_million < 28.0]{polygon-fill:#913850 ;} [gross_value_added_wood_processing_us_million > 28.0][gross_value_added_wood_processing_us_million < 302.0]{polygon-fill:#9932e6 ;} [gross_value_added_wood_processing_us_million > 302.0][gross_value_added_wood_processing_us_million < 41120.0]{polygon-fill:#7ea7d3 ;}',
 [{'color': '#42b64a', 'name': '>0.0'},
  {'color': '#b1434b', 'name': '<5.0'},
  {'color': '#913850', 'name': '<28.0'},
  {'color': '#9932e6', 'name': '<302.0'},
  {'color': '#7ea7d3', 'name': '<41120.0'}])

In [None]:
# Replace original dataset on Carto w/ georefed version

# Upload layer templates



In [267]:
layerdef = {'id': 'a7dbbf23-254a-49a3-a5dd-b12378fa345b', 'type': 'layer', 'attributes': {'name': '2017 Domestic Extraction - Non-Metallic Minerals (tonnes, millions)', 'slug': '2017-Domestic-Extraction-of-Raw-Materials', 'dataset': 'c61c364b-1d68-4dd9-ae3d-76c2a0022280', 'description': 'The total amount of non-metallic minerals extracted in 2017 by country.', 'application': ['rw'], 'iso': [], 'provider': 'cartodb', 'userId': '5981e73b0c069f3c93dc5e2a', 'default': True, 'protected': False, 'env': 'production', 'layerConfig': {'body': {'layers': [{'options': {'cartocss_version': '2.3.0', 'cartocss': '#com_009_1_material_flows_main {polygon-opacity: 1; line-width: 0.5; line-color: #FFF; line-opacity: 1;} [value<100000]{polygon-fill:#f2f0f7 ;} [value>=100000][value<250000]{polygon-fill:#dadaeb ;} [value>=250000][value<500000]{polygon-fill:#bcbddc ;} [value>=500000][value<1000000]{polygon-fill:#9e9ac8 ;} [value>=1000000][value<3000000]{polygon-fill:#756bb1 ;} [value>=3000000][value<25000000]{polygon-fill:#54278f ;}', 'sql': "SELECT countries.cartodb_id, ST_Transform(countries.the_geom, 3857) as the_geom_webmercator, countries.short_name, sub.* FROM(SELECT isoalpha3, time, mfa4, SUM(value) AS value FROM com_009_1_material_flows_main WHERE flow = 'DE' AND time = 2017 AND mfa4 ILIKE 'Non-metallic minerals' GROUP BY isoalpha3, time, mfa4 ORDER BY isoalpha3, time DESC) sub LEFT OUTER JOIN wri_countries_a countries ON sub.isoalpha3 = countries.adm0_a3"}, 'type': 'mapnik'}], 'minzoom': 3, 'maxzoom': 18}, 'account': 'wri-rw'}, 'legendConfig': {'type': 'choropleth', 'items': [{'name': '<100', 'color': '#f2f0f7'}, {'name': '<250', 'color': '#dadaeb'}, {'name': '<500', 'color': '#bcbddc'}, {'name': '<1000', 'color': '#9e9ac8'}, {'name': '<3000', 'color': '#756bb1'}, {'name': '<25000', 'color': '#54278f'}]}, 'interactionConfig': {}, 'applicationConfig': {}, 'staticImageConfig': {}, 'updatedAt': '2017-12-15T15:20:57.326Z'}}
layerdef

{'attributes': {'application': ['rw'],
  'applicationConfig': {},
  'dataset': 'c61c364b-1d68-4dd9-ae3d-76c2a0022280',
  'default': True,
  'description': 'The total amount of non-metallic minerals extracted in 2017 by country.',
  'env': 'production',
  'interactionConfig': {},
  'iso': [],
  'layerConfig': {'account': 'wri-rw',
   'body': {'layers': [{'options': {'cartocss': '#com_009_1_material_flows_main {polygon-opacity: 1; line-width: 0.5; line-color: #FFF; line-opacity: 1;} [value<100000]{polygon-fill:#f2f0f7 ;} [value>=100000][value<250000]{polygon-fill:#dadaeb ;} [value>=250000][value<500000]{polygon-fill:#bcbddc ;} [value>=500000][value<1000000]{polygon-fill:#9e9ac8 ;} [value>=1000000][value<3000000]{polygon-fill:#756bb1 ;} [value>=3000000][value<25000000]{polygon-fill:#54278f ;}',
       'cartocss_version': '2.3.0',
       'sql': "SELECT countries.cartodb_id, ST_Transform(countries.the_geom, 3857) as the_geom_webmercator, countries.short_name, sub.* FROM(SELECT isoalpha3, ti

In [236]:
carto_data.loc[carto_data['name'].str.lower().str.contains('conflict')]
# aliases set in Interaction Config
carto_data.loc['ea208a8b-4559-434b-82ee-95e041596a3a', 'layers']
# aliases set in dataset
carto_data.loc['ea208a8b-4559-434b-82ee-95e041596a3a']

url = "https://api.resourcewatch.org/v1/dataset/ea208a8b-4559-434b-82ee-95e041596a3a?sort=slug,-provider,userId&status=saved&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]
data

{'attributes': {'application': ['rw'],
  'attributesPath': None,
  'blockchain': {},
  'clonedHost': {},
  'connectorType': 'rest',
  'connectorUrl': 'https://rw-nrt.carto.com/tables/soc_016_conflict_protest_events/public',
  'dataPath': '',
  'env': 'production',
  'errorMessage': '',
  'geoInfo': True,
  'layer': [{'attributes': {'application': ['rw'],
     'applicationConfig': {},
     'dataset': 'ea208a8b-4559-434b-82ee-95e041596a3a',
     'default': False,
     'description': '',
     'env': 'production',
     'interactionConfig': {'output': [{'column': 'event_date',
        'format': None,
        'prefix': '',
        'property': 'Event date',
        'suffix': '',
        'type': 'date'},
       {'column': 'event_type',
        'format': None,
        'prefix': '',
        'property': 'Event Type',
        'suffix': '',
        'type': 'string'},
       {'column': 'actor1',
        'format': None,
        'prefix': '',
        'property': 'Actor 1',
        'suffix': '',
      