In [None]:
import requests as req
import pandas as pd
from pprint import pprint
from functools import reduce
import json

pd.options.display.max_rows = 400

In [None]:
# Base URL for getting dataset metadata from RW API
# Metadata = Data that describes Data 
url = "https://api.resourcewatch.org/v1/widget"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
api_response = res.json()["data"]

pprint(api_response[0], depth=2)

#############################################################

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html

def extract_metadata(wgets_on_api, wget, attributes):
    atts = wget["attributes"]
    wgets_on_api[atts["name"]] = {'wget_id': wget['id']}
    for att in attributes:
        try:
            wgets_on_api[atts["name"]][att] = atts[att]
        except:
            wgets_on_api[atts["name"]][att] = None
            
    return wgets_on_api

attributes = ['protected', 'published', 'queryUrl', 'dataset', 'default', 'widgetConfig']
widgets_on_api = reduce(lambda agg, elem: extract_metadata(agg, elem, attributes), api_response, {})

    
# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
widgets_on_api = pd.DataFrame.from_dict(widgets_on_api, orient='index')

In [None]:
#widgets_on_api.to_csv('widgets_on_api_feb_27_2018.csv')

In [None]:
widgets_on_api[widgets_on_api['default']].shape

In [None]:
widgets_on_api[widgets_on_api['published']].shape

In [None]:
#widgets_on_api[~widgets_on_api['published']].to_csv('datasets_to_drop_feb_27_2018.csv')

In [None]:
from configparser import ConfigParser
config = ConfigParser()
config.read("/Users/nathansuberi/Desktop/WRI_Programming/cred/.env")
api_token = config.get("auth", "rw_api_token")

auth_token = api_token # <Insert Auth Token Here>

In [None]:
widgets_on_api[~widgets_on_api['published']].reset_index()

In [None]:
widgets_to_delete = widgets_on_api[~widgets_on_api['published']].reset_index().set_index('wget_id')
for rw_id in widgets_to_delete.index:
    data = widgets_to_delete.loc[rw_id]
    url = "https://api.resourcewatch.org/v1/dataset/{dataset_id}/widget/{widget_id}"
    url = url.format(dataset_id=data['dataset'], widget_id=rw_id)

    headers = {
        'content-type': "application/json",
        'authorization': "Bearer " + auth_token,
    }

    try:
        res = req.request("DELETE", url, headers = headers)
        print(res.text)
        if res.ok:
            print('Deleted widget {}'.format(data['index']))
        else:
            print('Wasn\'t able to delete widget {}'.format(data['index']))
    except TypeError as e:
        print(e.args)
        print(data)