# Import Libraries

In [None]:
# Data fetching library
import requests as req
# used below: 'res' stands for 'response'

# File management library
import os

# Configure logging library
import logging
import sys
logging.basicConfig(stream=sys.stderr, level=logging.INFO)

# Data manipulation libraries
import pandas as pd
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
import json

# Uploading results to Carto


# Data visualization library
## Uses Vega-Lite, which can be easily put in websites
from vega3 import Vega

# Load Data from RW API

In [None]:
# Base URL for getting dataset metadata from RW API
url = "https://api.resourcewatch.org/v1/dataset?sort=slug,-provider,userId&status=saved&published=true&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html
datasets_on_api = {}
for ix, dset in enumerate(data):
    atts = dset["attributes"]
    metadata = atts["metadata"]
    layers = atts["layer"]
    widgets = atts["widget"]
    tags = atts["vocabulary"]
    datasets_on_api[dset["id"]] = {
        "name":atts["name"],
        "table_name":atts["tableName"],
        "provider":atts["provider"],
        "date_updated":atts["updatedAt"],
        "num_metadata":len(metadata),
        "metadata": metadata,
        "num_layers":len(layers),
        "layers": layers,
        "num_widgets":len(widgets),
        "widgets": widgets,
        "num_tags":len(tags),
        "tags":tags
    }

# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
current_datasets_on_api = pd.DataFrame.from_dict(datasets_on_api, orient='index')
current_datasets_on_api.index.rename("Dataset", inplace=True)
current_datasets_on_api.sort_values(by=["date_updated"], inplace=True, ascending = False)

# Select all Carto datasets on the API:
provider = "cartodb"
carto_ids = (current_datasets_on_api["provider"]==provider)
carto_data = current_datasets_on_api.loc[carto_ids]

logging.info("Number of Carto datasets: " + str(carto_data.shape[0]))

# Find data sets to run the analysis with

In [None]:
# Select only national level data sets

def pick_spatial_resolution(mdatas):
    for mdata in mdatas:
        if mdata['attributes']['language'] == 'en':
            return mdata['attributes']['info'].get('spatial_resolution', None)

def pick_temporal_resolution(mdatas):
    for mdata in mdatas:
        if mdata['attributes']['language'] == 'en':
            return mdata['attributes']['info'].get('frequency_of_updates', None)
        
carto_data.loc[:,'spatial_resolution'] = list(map(pick_spatial_resolution, carto_data['metadata']))
carto_data.loc[:,'temporal_resolution'] = list(map(pick_temporal_resolution, carto_data['metadata']))

national_datasets = carto_data[carto_data['spatial_resolution'] == 'National']
annual_national_datasets = national_datasets[national_datasets['temporal_resolution'] == 'Annual']

logging.info("{} annual, national level data sets".format(annual_national_datasets.shape[0]))
annual_national_datasets.shape

## Find which data sets cover the right years and are "simple"

### Find year column

In [None]:
def inquire_columns(info):
    ds_id, table_name = info
    print(ds_id)
    # Template query string used to query RW datasets
    q = "https://api.resourcewatch.org/v1/query/{}?sql=SELECT * FROM {} LIMIT 1".format(ds_id,table_name)
    print(q)
    try:
        return req.get(q).json()['data'][0].keys()
    except:
        return None
    
# Collect column names
annual_national_datasets.loc[:, 'column_names'] = list(map(inquire_columns, zip(annual_national_datasets.index,annual_national_datasets['table_name'])))

In [None]:
def pick_year_col(column_names):
    print("Available column names: {}".format(column_names))
    year_col = input("What is the year column called? Type nothing for None")
    return year_col if year_col else None

annual_national_datasets.loc[:, 'year_col'] = list(map(pick_year_col, annual_national_datasets['column_names']))

In [None]:
annual_national_datasets['year_col']

### Find dates covered by each dataset

In [None]:
def try_datetime_conversion_to_year(possible_dt):
    DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
    try:
        return datetime.strptime(possible_dt, DATE_FORMAT).year
    except:
        return int(possible_dt)

def find_years_covered(info):
    ds_id, year_col, table_name = info
    if year_col:
        print(ds_id)
        # Template query string used to query RW datasets
        q = "https://api.resourcewatch.org/v1/query/{}?sql=SELECT {} FROM {}".format(ds_id,year_col,table_name)
        print(q)
        try:
            res = pd.DataFrame(req.get(q).json()['data'])
            unique_years = res[year_col].unique()
            int_years = list(map(try_datetime_conversion_to_year, unique_years))
            return int_years
        except:
            return None
    else:
        return None
    
    
annual_national_datasets['years_covered'] = list(map(find_years_covered, zip(annual_national_datasets.index, annual_national_datasets['year_col'], annual_national_datasets['table_name'])))

In [None]:
annual_national_datasets['years_covered']

### Pick the value column to use

### Determine whether any filter on an additional column is needed (i.e. MFA data would need to be filtered by mfa13 category)

# Run the analysis

# Upload the results to Carto and RW API

# Configure the vega chart output, which can be uploaded to the RW API

In [None]:
Config=''.join([
    "{\n",
    "  \"range\": {\n",
    "    \"dotSize\": [20, 250],\n",
    "    \"category\": [\n",
    "      \"#3BB2D0\",\n",
    "      \"#2C75B0\",\n",
    "      \"#FAB72E\",\n",
    "      \"#EF4848\",\n",
    "      \"#65B60D\",\n",
    "      \"#717171\"\n",
    "    ],\n",
    "    \"category20\": [\n",
    "      \"#3BB2D0\",\n",
    "      \"#2C75B0\",\n",
    "      \"#FAB72E\",\n",
    "      \"#EF4848\",\n",
    "      \"#65B60D\",\n",
    "      \"#C32D7B\",\n",
    "      \"#F577B9\",\n",
    "      \"#5FD2B8\",\n",
    "      \"#F1800F\",\n",
    "      \"#9F1C00\",\n",
    "      \"#A5E9E3\",\n",
    "      \"#B9D765\",\n",
    "      \"#393F44\",\n",
    "      \"#CACCD0\",\n",
    "      \"#717171\"\n",
    "    ],\n",
    "    \"ordinal\": { \"scheme\": \"greens\" },\n",
    "    \"ramp\": { \"scheme\": \"purples\" }\n",
    "  },\n",
    "  \"axis\": {\n",
    "    \"labelFontSize\": 13,\n",
    "    \"labelFont\": \"Lato\",\n",
    "    \"labelColor\": \"#717171\",\n",
    "    \"labelPadding\": 10,\n",
    "    \"ticks\": true,\n",
    "    \"tickSize\": 5,\n",
    "    \"tickColor\": \"#A9ABAD\",\n",
    "    \"tickOpacity\": 0.5,\n",
    "    \"tickExtra\": false\n",
    "  },\n",
    "  \"axisX\": {\n",
    "    \"bandPosition\": 0.5,\n",
    "    \"domainWidth\": 1.2,\n",
    "    \"domainColor\": \"#A9ABAD\",\n",
    "    \"labelAlign\": \"center\",\n",
    "    \"labelBaseline\": \"top\"\n",
    "  },\n",
    "  \"axisY\": {\n",
    "    \"domain\": false,\n",
    "    \"labelAlign\": \"left\",\n",
    "    \"labelBaseline\": \"bottom\",\n",
    "    \"tickOpacity\": 0.5,\n",
    "    \"grid\": true,\n",
    "    \"ticks\": false,\n",
    "    \"gridColor\": \"#A9ABAD\",\n",
    "    \"gridOpacity\": 0.5\n",
    "  },\n",
    "  \"mark\": {\n",
    "    \"fill\": \"#3BB2D0\"\n",
    "  },\n",
    "  \"symbol\": {\n",
    "    \"fill\": \"#3BB2D0\",\n",
    "    \"stroke\": \"#fff\"\n",
    "  },\n",
    "  \"rect\": {\n",
    "    \"cornerRadius\": 0.3,\n",
    "    \"fill\": \"#3BB2D0\"\n",
    "  },\n",
    "  \"line\": {\n",
    "    \"interpolate\": \"linear\",\n",
    "    \"stroke\": \"#3BB2D0\",\n",
    "    \"fillOpacity\": 0\n",
    "  }\n",
    "}\n"])

In [None]:
specV3 = json.loads(''.join(["{\n",
    "  \"autosize\": {\n",
    "    \"type\": \"pad\",\n",
    "    \"resize\": true,\n",
    "    \"contains\": \"padding\"\n",
    "  },\n",
    "  \"data\": [\n",
    "    {\n",
    "      \"name\": \"table\",\n",
    "      \"url\": \"https://wri-rw.carto.com/api/v2/sql?q=SELECT year as x, bleached_area as y FROM sotp_bleaching_alerts ORDER  BY year\",\n",
    "      \"format\": {\n",
    "        \"type\": \"json\",\n",
    "        \"property\": \"rows\"\n",
    "      },\n",
    "      \"transform\": [\n",
    "        {\n",
    "          \"type\": \"window\",\n",
    "          \"sort\": {\n",
    "            \"field\": \"x\",\n",
    "            \"order\": \"ascending\"\n",
    "          }\n",
    "        }\n",
    "      ]\n",
    "    }\n",
    "  ],\n",
    "  \"scales\": [\n",
    "    {\n",
    "      \"name\": \"x\",\n",
    "      \"type\": \"band\",\n",
    "      \"domain\": {\n",
    "        \"data\": \"table\",\n",
    "        \"field\": \"x\"\n",
    "      },\n",
    "      \"range\": \"width\",\n",
    "      \"padding\": 0.05,\n",
    "      \"round\": true\n",
    "    },\n",
    "    {\"type\": \"linear\",\n",
    "      \"name\": \"y\",\n",
    "      \"domain\": {\n",
    "        \"data\": \"table\",\n",
    "        \"field\": \"y\"\n",
    "      },\n",
    "      \"nice\": true,\n",
    "      \"range\": \"height\"\n",
    "    }\n",
    "  ],\n",
    "  \"axes\": [\n",
    "    {\n",
    "      \"orient\": \"bottom\",\n",
    "      \"scale\": \"x\",\n",
    "      \"labelOverlap\":\"parity\",\n",
    "      \"encode\": {\n",
    "        \"labels\": {\n",
    "          \"update\": {\n",
    "            \"angle\": {\n",
    "              \"value\": 90\n",
    "            },\n",
    "            \"align\": {\n",
    "              \"value\": \"left\"\n",
    "            },\n",
    "            \"baseline\": {\n",
    "              \"value\": \"middle\"\n",
    "            }\n",
    "          }\n",
    "        }\n",
    "      }\n",
    "    },\n",
    "    {\n",
    "      \"orient\": \"left\",\n",
    "      \"labelOverlap\":\"parity\",\n",
    "      \"scale\": \"y\",\n",
    "      \"format\": \"2s\"\n",
    "    }\n",
    "  ],\n",
    "  \"marks\": [\n",
    "    {\n",
    "      \"type\": \"rect\",\n",
    "      \"from\": {\n",
    "        \"data\": \"table\"\n",
    "      },\n",
    "      \"encode\": {\n",
    "        \"enter\": {\n",
    "          \"x\": {\n",
    "            \"scale\": \"x\",\n",
    "            \"field\": \"x\"\n",
    "          },\n",
    "          \"width\": {\n",
    "            \"scale\": \"x\",\n",
    "            \"band\": 1\n",
    "          },\n",
    "          \"y\": {\n",
    "            \"scale\": \"y\",\n",
    "            \"field\": \"y\"\n",
    "          },\n",
    "          \"y2\": {\n",
    "            \"scale\": \"y\",\n",
    "            \"value\": 0\n",
    "          }\n",
    "        },\n",
    "        \"update\": {\n",
    "          \"opacity\": {\n",
    "            \"value\": 1\n",
    "          }\n",
    "        },\n",
    "        \"hover\": {\n",
    "          \"opacity\": {\n",
    "            \"value\": 0.8\n",
    "          }\n",
    "        }\n",
    "      }\n",
    "    }\n",
    "  ]\n",
    "}"]))
                  
vega4view = dict(specV3)
vega4view['config']=json.loads(Config)
vega4view['width']=300
vega4view['height']=200
Vega(vega4view)