In [1]:
import json
import os
import requests
import datetime
import numpy as np
import pandas as pd
import glob

In [2]:
# Set target directory of logged API json files, will create new one if it does not exist
data_dir = 'rw_api_archive'
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

In [3]:
# Download a copy of RW dataset, layer, and widget endpoints. Note 'env' and 'application' params in URL string
api_list = ['dataset','layer','widget']

for endpoint in api_list:
    url = f'http://api.resourcewatch.org/v1/{endpoint}?page[size]=10000&env=production&application=rw'
    r = requests.get(url)
    if r.status_code == 200:
        data = r.json()['data']
        mb = round(float(r.headers['content-length'])/1e6, 2)
    
        print(f"Received {mb} MB",r.headers['Content-Encoding'], "file in",
              r.headers['X-Response-Time'],f"for {endpoint} endpoint")
        d8 = datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")
        with open(f'{data_dir}/{endpoint}_{d8}.json', 'w') as fp: #<-- ASSUMES DIRECTORY
            json.dump(data, fp)
    else:
        print(f"Error retrieving {endpoint} API endpoint")

Received 0.12 MB gzip file in 791 ms for dataset endpoint
Received 0.95 MB gzip file in 2717 ms for layer endpoint
Received 0.84 MB gzip file in 2538 ms for widget endpoint


In [4]:
# select filename of most recent matching json files in target directory and load as json
recent_dataset_dl = max(glob.iglob(f'{data_dir}/datas*.json'), key=os.path.getctime)
recent_layer_dl = max(glob.iglob(f'{data_dir}/layer*.json'), key=os.path.getctime)
recent_widget_dl = max(glob.iglob(f'{data_dir}/widge*.json'), key=os.path.getctime)

with open(recent_dataset_dl) as json_file:
    rw_dataset_data = json.load(json_file)
    
with open(recent_layer_dl) as json_file:
    rw_layer_data = json.load(json_file)
    
with open(recent_widget_dl) as json_file:
    rw_widget_data = json.load(json_file)

In [5]:
# Set substring, will search for string across all json objects
subs = 'ene_028_access_clean_cooking' #<-- SET SUBSTRING HERE
layers_using = [x for x in rw_layer_data if str(x).count(subs) != 0] 
widgets_using = [x for x in rw_widget_data if str(x).count(subs) != 0] 
datasets_using = [x for x in rw_dataset_data if str(x).count(subs) != 0] 

In [6]:
# List ids of all matching objects
litems = [datasets_using, layers_using, widgets_using]
print(subs)
for i, x in enumerate(['datasets', 'layers', 'widgets']):
    print(x)
    for j in litems[i]:
        print(j['id'])

ene_028_access_clean_cooking
datasets
c665f519-eef9-4f67-a8bf-7e3e6dc8bfcd
layers
95ecdbc7-be90-4c45-9c64-522a5b2e32e6
9e9f1761-17be-472b-aaa8-3e39fbf4e41e
ed564b23-9f4d-4366-9ed0-3283c42ceb48
fb973a76-cc49-4121-b300-45c0fe67c5d6
6a8d573a-6ba4-4c3a-9734-d5755965a364
8ae3c3a9-801d-4cc0-9440-03f454c79557
dab6df2e-99f4-4121-8bff-4e69c0dc99db
9ad47949-d103-45d5-99ef-d670373f06d1
250019ca-1288-4590-ae4d-5db49ae689c0
840dfb2a-e52f-4336-a258-1e6998387959
c8d84799-e67c-47f0-bda9-27bdf990eb18
6eb06286-e6c9-486d-a341-09df62b0d6f8
594b1ecb-d747-42a9-a05d-b33109c3963a
30f3c77e-5e8a-49d3-b20c-789c30786012
2dba8a9f-89c5-4796-aeb5-6c51cbe287a5
2db13125-a82b-4c59-b29c-faa06c762236
e81f3915-aff0-4e71-864a-3241a2cfb5d4
widgets
bcbdf5db-6e27-4250-9b74-ce238ef2bde9
