#### Purpose:

This notebook contains the code used to extract the "placenames" from our dataset package extras so that the `place` tag vocabulary could be created with tags for our currently-in-use placenames included.

---


In [1]:
import json
import os
import requests
import ckanapi.errors
from ckanapi import RemoteCKAN

PROD_API_KEY = os.environ.get('CKAN_APIKEY')
PROD_SITE_URL = "https://data.naturalcapitalproject.stanford.edu"

  import pkg_resources


In [2]:
def extract_extras(ckan_url, ckan_apikey, verify=True):
    session = requests.Session()
    session.headers.update({'Authorization': ckan_apikey})
    session.verify = verify

    with RemoteCKAN(ckan_url, apikey=ckan_apikey, session=session) as catalog:
        datasets = catalog.action.package_list()

        pkg_extras = {}
        for dataset in datasets:
            pkg = catalog.action.package_show(id=dataset)
            pkg_extras[dataset] = pkg.get('extras', [])

    return pkg_extras

In [3]:
prod_data = extract_extras(PROD_SITE_URL, PROD_API_KEY)

In [4]:
def placenames_by_dataset(data):
    placenames_mapping = {}
    for pkg_id, extras in data.items():
        for extra in extras:
            if extra.get('key', '') == 'placenames':
                placenames_mapping[pkg_id] = json.loads(extra['value'])
                break
    return placenames_mapping

In [5]:
pkg_placenames = placenames_by_dataset(prod_data)

In [6]:
def placenames_set(pkg_placenames):
    all_placenames = []
    for v in pkg_placenames.values():
        all_placenames.extend(v)
    return set(all_placenames)

In [7]:
all_placenames = placenames_set(pkg_placenames)

In [8]:
all_placenames

{'GLOBAL',
 'PACIFIC ISLANDS',
 'PUERTO RICO',
 'UNITED STATES',
 'US VIRGIN ISLANDS'}