In [1]:
# Data fetching library
import requests as req
# used below: 'res' stands for 'response'

# File management library
import os

# Data manipulation libraries
import pandas as pd
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

# Data visualization library
## Uses Vega-Lite, which can be easily put in websites
from altair import *

# Find information about all data on the Resource Watch API, format as DataFrame

In [2]:
# Base URL for getting dataset metadata from RW API
# Metadata = Data that describes Data 
url = "https://api.resourcewatch.org/v1/dataset?sort=slug,-provider,userId&status=saved&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]

#############################################################

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html
datasets_on_api = {}
for ix, dset in enumerate(data):
    atts = dset["attributes"]
    metadata = atts["metadata"]
    layers = atts["layer"]
    widgets = atts["widget"]
    tags = atts["vocabulary"]
    datasets_on_api[atts["name"]] = {
        "rw_id":dset["id"],
        "table_name":atts["tableName"],
        "provider":atts["provider"],
        "date_updated":atts["updatedAt"],
        "num_metadata":len(metadata),
        "metadata": metadata,
        "num_layers":len(layers),
        "layers": layers,
        "num_widgets":len(widgets),
        "widgets": widgets,
        "num_tags":len(tags),
        "tags":tags
    }

# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
current_datasets_on_api = pd.DataFrame.from_dict(datasets_on_api, orient='index')
current_datasets_on_api.index.rename("Dataset", inplace=True)
current_datasets_on_api.sort_values(by=["date_updated"], inplace=True, ascending = False)

In [3]:
# View datasets on the Resource Watch API
current_datasets_on_api.head()

Unnamed: 0_level_0,rw_id,table_name,provider,date_updated,num_metadata,metadata,num_layers,layers,num_widgets,widgets,num_tags,tags
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
wat.036 Water Stress Country Ranking,86fff033-dd27-41be-be64-2ec8983b3db1,wat_036_water_stress_country_ranking,cartodb,2018-01-24T19:33:58.147Z,1,"[{'id': '5a5df56d1db3960012e77a74', 'type': 'm...",1,[{'id': 'c4b5b246-1dcf-492f-8352-278e737ffdf7'...,1,[{'id': 'fd6f666b-da36-497f-946d-1af871aa1535'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cli.017 Glacier Extents,e2971008-029f-441b-97cd-ee0555728182,cli_017_glacier_extent,cartodb,2018-01-24T19:13:05.952Z,1,"[{'id': '59f881e85366950011d78d59', 'type': 'm...",1,[{'id': '614b5627-b34b-41aa-8c07-baad3ab649f5'...,3,[{'id': '0174ee41-b64d-44b5-85be-8c99bf9dc436'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cli.029 Vulnerability to Climate Change Index,fa6443ff-eb95-4d0f-84d2-f0c91682efdf,cli_029_vulnerability_to_cc,cartodb,2018-01-24T14:22:57.938Z,2,"[{'id': '59a427ab7b6c000012baa707', 'type': 'm...",1,[{'id': '07b6e469-d1c4-4ab8-a8a7-cf37f344ae4c'...,2,[{'id': '8c44a854-63e7-4ce6-b864-4858c7394852'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cit.003 Air Quality Measurements (PM25),815eaa09-d626-495e-91e2-523cb07de475,%20cit_003a_air_quality_pm25,cartodb,2018-01-21T22:16:44.233Z,1,"[{'id': '5a3eaec21479c50011fcbd56', 'type': 'm...",9,[{'id': 'a0cf3282-976c-455c-9352-3cc559dc76c0'...,9,[{'id': '0871f00b-c48d-46ab-a515-ac550a2f351b'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
[delete] soc.063 Anthropogenic Biomes of the World,d8a45b34-4cc0-42f4-957d-e13b37e9182e,anthro2_a2000,cartodb,2018-01-19T21:27:12.553Z,2,"[{'id': '59d7cac1331a2f00111422e7', 'type': 'm...",1,[{'id': '60c2f13b-9522-4255-98e2-f373f9b4a1f2'...,2,[{'id': 'd81a14b0-8388-4b75-abb0-8149102426f0'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."


In [4]:
# View all providers of RW data
current_datasets_on_api["provider"].unique()

array(['cartodb', 'gee', 'featureservice', 'nexgddp', 'csv', 'json',
       'bigquery', 'rasdaman', 'wms'], dtype=object)

In [5]:
# Choose only datasets stored on:
## cartodb, csv, gee, featureservice, bigquery, wms, json, rasdaman
provider = "cartodb"
carto_ids = (current_datasets_on_api["provider"]==provider)
carto_data = current_datasets_on_api.loc[carto_ids]

print("Number of Carto datasets: ", carto_data.shape[0])

Number of Carto datasets:  213


In [6]:
carto_data.head()

Unnamed: 0_level_0,rw_id,table_name,provider,date_updated,num_metadata,metadata,num_layers,layers,num_widgets,widgets,num_tags,tags
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
wat.036 Water Stress Country Ranking,86fff033-dd27-41be-be64-2ec8983b3db1,wat_036_water_stress_country_ranking,cartodb,2018-01-24T19:33:58.147Z,1,"[{'id': '5a5df56d1db3960012e77a74', 'type': 'm...",1,[{'id': 'c4b5b246-1dcf-492f-8352-278e737ffdf7'...,1,[{'id': 'fd6f666b-da36-497f-946d-1af871aa1535'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cli.017 Glacier Extents,e2971008-029f-441b-97cd-ee0555728182,cli_017_glacier_extent,cartodb,2018-01-24T19:13:05.952Z,1,"[{'id': '59f881e85366950011d78d59', 'type': 'm...",1,[{'id': '614b5627-b34b-41aa-8c07-baad3ab649f5'...,3,[{'id': '0174ee41-b64d-44b5-85be-8c99bf9dc436'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cli.029 Vulnerability to Climate Change Index,fa6443ff-eb95-4d0f-84d2-f0c91682efdf,cli_029_vulnerability_to_cc,cartodb,2018-01-24T14:22:57.938Z,2,"[{'id': '59a427ab7b6c000012baa707', 'type': 'm...",1,[{'id': '07b6e469-d1c4-4ab8-a8a7-cf37f344ae4c'...,2,[{'id': '8c44a854-63e7-4ce6-b864-4858c7394852'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
cit.003 Air Quality Measurements (PM25),815eaa09-d626-495e-91e2-523cb07de475,%20cit_003a_air_quality_pm25,cartodb,2018-01-21T22:16:44.233Z,1,"[{'id': '5a3eaec21479c50011fcbd56', 'type': 'm...",9,[{'id': 'a0cf3282-976c-455c-9352-3cc559dc76c0'...,9,[{'id': '0871f00b-c48d-46ab-a515-ac550a2f351b'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."
[delete] soc.063 Anthropogenic Biomes of the World,d8a45b34-4cc0-42f4-957d-e13b37e9182e,anthro2_a2000,cartodb,2018-01-19T21:27:12.553Z,2,"[{'id': '59d7cac1331a2f00111422e7', 'type': 'm...",1,[{'id': '60c2f13b-9522-4255-98e2-f373f9b4a1f2'...,2,[{'id': 'd81a14b0-8388-4b75-abb0-8149102426f0'...,1,"[{'type': 'vocabulary', 'attributes': {'resour..."


In [11]:
electricity_datasets = [ds for ds in carto_data.index if 'electricity' in ds.lower()]
electricity_datasets

['Electricity/Heat Per Capita',
 'ene.024 Electricity Production, Sources, and Access',
 'ene.012 Access to Electricity']

# Import CSV with lat, lon points. These can be used to get information out of the Resource Watch API

In [7]:
# Store your data in a "data" folder in the same location
# As this notebook
DATA_FOLDER = os.getcwd() + "/data/"

# src: https://developers.google.com/public-data/docs/canonical/countries_csv
country_points = pd.read_csv(DATA_FOLDER + "country_points.tsv", sep="\t")

country_points.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


# Create queries to retrieve specific information from the Resource Watch API

In [8]:
# Template query string used to query RW datasets
query_base = "https://api.resourcewatch.org/v1/query/{}?sql={}"

# Template SQL string used in RW query
sql = "".join(["SELECT * FROM {} WHERE ",
"ST_Intersects({}, ",
"{}.the_geom)"])

In [12]:
# Pick a dataset from carto_data
dataset = 'ene.012 Access to Electricity'

# Select the Carto table name, and Resource Watch ID (rw_id)
# The rw_id is needed to query the RW API
table_name = carto_data.loc[dataset, "table_name"]
rw_id = carto_data.loc[dataset, "rw_id"]

def query_api(row):
    # Construct a Well-Known-Text (WKT) Point string
    # WKT formats points: 'POINT(Latitude Longitude)'
    # https://www.drupal.org/node/511370
    point = "ST_GeomFromText('POINT({} {})', 4326)".format(row.longitude, row.latitude)
    
    # Use the templates defined above to create/send a query to RW API
    query_sql = sql.format(table_name, point, table_name)
    query = query_base.format(rw_id, query_sql)    
    res = req.get(query)

    # Try, except: useful in python to catch errors,
    # and provide an alternative action if an error occurs
    try:
        data = res.json()["data"]
        return(data[0]["yr_2014"])
    except:
        return("No matching data found")

country_points["% Urban Population with Electricity Access"] = pd.Series(query_api(row) for row in country_points.itertuples())

In [14]:
country_points

Unnamed: 0,country,latitude,longitude,name,% Urban Population with Electricity Access
0,AD,42.546245,1.601554,Andorra,No matching data found
1,AE,23.424076,53.847818,United Arab Emirates,No matching data found
2,AF,33.93911,67.709953,Afghanistan,No matching data found
3,AG,17.060816,-61.796428,Antigua and Barbuda,No matching data found
4,AI,18.220554,-63.068615,Anguilla,No matching data found
5,AL,41.153332,20.168331,Albania,No matching data found
6,AM,40.069099,45.038189,Armenia,No matching data found
7,AN,12.226079,-69.060087,Netherlands Antilles,No matching data found
8,AO,-11.202692,17.873887,Angola,No matching data found
9,AQ,-75.250973,-0.071389,Antarctica,No matching data found


In [15]:
# Add a plot to show something about it

Chart(country_points).mark_bar().encode(
x=X("% Urban Population with Electricity Access", bin=Bin()),
y="count(*):Q")

You can access infer_dtype as pandas.api.types.infer_dtype
  typ = pd.lib.infer_dtype(data)
