In [1]:
import pandas as pd
import numpy as np
import requests as req
import json
from pprint import pprint
from datetime import datetime, timedelta

pd.options.display.max_rows = 400


In [4]:
# Base URL for getting dataset metadata from RW API
# Metadata = Data that describes Data
url = "https://api.resourcewatch.org/v1/dataset?sort=slug,-provider,userId&status=saved&includes=metadata,vocabulary,widget,layer"

# page[size] tells the API the maximum number of results to send back
# There are currently between 200 and 300 datasets on the RW API
payload = { "application":"rw", "page[size]": 1000}

# Request all datasets, and extract the data from the response
res = req.get(url, params=payload)
data = res.json()["data"]

In [5]:

### Convert the json object returned by the API into a pandas DataFrame
# Another option: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.io.json.json_normalize.html
datasets_on_api = {}
for ix, dset in enumerate(data):
   atts = dset["attributes"]
   metadata = atts["metadata"]
   layers = atts["layer"]
   widgets = atts["widget"]
   tags = atts["vocabulary"]
   datasets_on_api[atts["name"]] = {
       "rw_id":dset["id"],
       "table_name":atts["tableName"],
       "provider":atts["provider"],
       "date_updated":atts["updatedAt"],
       "num_metadata":len(metadata),
       "metadata": metadata,
       "num_layers":len(layers),
       "layers": layers,
       "num_widgets":len(widgets),
       "widgets": widgets,
       "num_tags":len(tags),
       "tags":tags
   }

In [9]:
# Create the DataFrame, name the index, and sort by date_updated
# More recently updated datasets at the top
current_datasets_on_api = pd.DataFrame.from_dict(datasets_on_api, orient="index")
current_datasets_on_api.index.rename("Dataset", inplace=True)
current_datasets_on_api.sort_values(by=["date_updated"], inplace=True, ascending = False)
full_df = pd.read_csv("Refugees_CSV.csv", encoding="ISO-8859-1")
# This csv 2017_to_and_from is just the same download of refugees all origins all destinations 2017, I cut
# off the first two rows manually because they just say what the dataset is.

In [25]:

full_df["Refugees"]= pd.to_numeric(full_df["Refugees"])
origins = full_df.groupby("Country")
origins = origins["Refugees"].sum()
origins = pd.DataFrame(origins)
origins["Refugees"] = (origins["Refugees"]).astype(int)

In [40]:
# This is so you can see the country names you need to enter manually
bounds = req.get("https://raw.githubusercontent.com/wri/wri-bounds/master/dist/all_primary_countries.geojson").json()
refugee_csv = pd.read_excel("Refugee_CSV.xlsx").set_index("country2")
refugee_csv["refugees2"] = refugee_csv["refugees2"].astype(np.float64)
refugee_csv

Unnamed: 0_level_0,Country,Country Code,Refugees_USA,Refugees_World,Country_Pop,% Displaced,refugees2
country2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,Afghanistan,AFG,13969.0,30027956.0,35530080.0,0.845142,2624225.0
Albania,Albania,ALB,41333.0,140845.0,2873457.0,0.049016,12163.0
Algeria,Algeria,DZA,2509.0,64738.0,41318140.0,0.001567,3991.0
American Samoa,Andorra,AND,38.0,59.0,76965.0,0.000767,0.0
Andorra,Angola,AGO,1663.0,829659.0,29784190.0,0.027856,0.0
Angola,Antigua and Barbuda,ATG,19.0,521.0,102012.0,0.005107,8267.0
Antigua and Barbuda,Argentina,ARG,1082.0,5487.0,44271040.0,0.000124,94.0
Argentina,Armenia,ARM,43432.0,156286.0,2930450.0,0.053332,111.0
Armenia,Australia,AUS,9.0,345.0,24598930.0,1.4e-05,10766.0
Australia,Austria,AUT,37.0,112.0,8809212.0,1.3e-05,0.0


In [41]:
for ix, cntry in enumerate(bounds["features"]):
    name = cntry["properties"]["name"]
    if name not in refugee_csv.index:
        print(name)
     #  new_name = input(‘What should new name be?’)
      # if new_name:
       #    bounds[‘features’][ix][‘properties’][‘name’] = new_name
        #   bounds[‘features’][ix][‘properties’][‘refugee_count’] = origins.loc[new_name, ‘Refugees’]
       #else:
        #   print(‘no data’)
        bounds["features"][ix]["properties"]["refugees2"] = None
    else:
        bounds["features"][ix]["properties"]["refugee_count"] = refugee_csv.loc[name, "refugees2"]
        print(type(refugee_csv.loc[name, "refugees2"]))
        print(type(refugee_csv.loc[name, "Refugees_World"]))

<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
The Bahamas
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<cla

In [42]:
json.dump(bounds, open("bounds_with_refugee_counts_test.geojson","w"))