In [None]:
import requests as req
import pandas as pd
import numpy as np
from rw_api_tools.rw_api_tools import rw_api_tools as rw
rw = rw()

In [None]:
pd.options.display.max_rows = 4000
pd.options.display.max_columns = 4000

In [None]:
carto_data = rw.get_rw_datasets(provider="cartodb")

In [None]:
wb_datasets = pd.DataFrame([carto_data.loc[ix] for ix in carto_data.index if "wb" in carto_data.loc[ix, "table_name"]])

In [None]:
wb_datasets

In [None]:
def create_query(rw_id, table_name):
    sql = "SELECT * FROM {}".format(table_name)
    query = "https://api.resourcewatch.org/v1/query/{}?sql={}".format(rw_id, sql)
    return(query)

wb_datasets["query"] = list(map(create_query, wb_datasets["rw_id"], wb_datasets["table_name"]))

In [None]:
def retrieve_data(query):
    print("RW API query:", query)
    res = req.get(query)
    data = res.json()["data"]
    print("Num results: ", len(data))
    return(data)

wb_datasets["data"] = list(map(retrieve_data, wb_datasets["query"]))

In [None]:
starter = pd.DataFrame(wb_datasets["data"].iloc[0]).sort_values(by=["time", "country_code"])
tuples = list(zip(*[starter["time"],starter["country_code"]]))
multi_index = pd.MultiIndex.from_tuples(tuples, names=["year", "country_code"])
starter.index = multi_index
starter = starter.drop(["the_geom", "the_geom_webmercator", "cartodb_id", "time_code", "time", "country_code"], axis=1)

In [None]:
def add_wb_col(old_data, new_data):
    add_on = pd.DataFrame(new_data).sort_values(by=["time", "country_code"])
    tuples = list(zip(*[add_on["time"],add_on["country_code"]]))
    multi_index = pd.MultiIndex.from_tuples(tuples, names=["year", "country_code"])
    add_on.index = multi_index
    add_on = add_on.drop(["the_geom", "the_geom_webmercator", "cartodb_id", "time_code", "time", "country_code", "country_name"], axis=1)

    return(old_data.join(add_on))

wb_data = starter

for i in range(1,wb_datasets.shape[0]):
    wb_data = add_wb_col(wb_data, wb_datasets["data"].iloc[i])

In [None]:
data_for_nate = wb_data.reset_index()

In [None]:
data_for_nate = data_for_nate.replace("..", np.nan)

In [None]:
data_for_nate_iso3 = data_for_nate["country_code"].values

In [None]:
# Import country boundary shapefile to make sure unique ID matches
country_boundaries = pd.read_json("/Users/nathansuberi/Desktop/RW_Data/all_primary_countries.json")["features"]
country_boundaries = pd.io.json.json_normalize(country_boundaries)

In [None]:
country_boundaries.head()

In [None]:
shapefile_iso3 = country_boundaries["properties.iso_a3"].values

In [None]:
shapefile_iso3

In [None]:
data_for_nate_iso3_total_pos_matches = len(data_for_nate_iso3)
shapefile_iso3_total_pos_matches = len(shapefile_iso3)

print(data_for_nate_iso3_total_pos_matches)
print(shapefile_iso3_total_pos_matches)

In [None]:
data_shapefile_matches = np.unique([iso for iso in data_for_nate_iso3 if iso in shapefile_iso3])
shapefile_data_matches = np.unique([iso for iso in shapefile_iso3 if iso in data_for_nate_iso3])

print(len(data_shapefile_matches))
print(len(shapefile_data_matches))

len(np.unique([iso for iso in data_shapefile_matches if iso in shapefile_data_matches]))

In [None]:
## Load CDIAC Data and make sure it matches here too

root_folder = "/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Data Packs/Materials for Nate/"
file_name = root_folder + "wb_names_only_with_iso_territory_gcb.csv"

cdiac_data = pd.read_csv(file_name)
cdiac_data_iso3 = cdiac_data["ISO"].values
cdiac_data_iso3_total_pos_matches = len(cdiac_data_iso3)

In [None]:
print(cdiac_data_iso3_total_pos_matches)

cdiac_shapefile_misses = [iso for iso in cdiac_data_iso3 if iso not in shapefile_iso3]
cdiac_wb_misses = [iso for iso in cdiac_data_iso3 if iso not in data_for_nate_iso3]

cdiac_shapefile_matches = [iso for iso in cdiac_data_iso3 if iso in shapefile_iso3]
cdiac_wb_matches = [iso for iso in cdiac_data_iso3 if iso in data_for_nate_iso3]

print(len(np.unique(cdiac_shapefile_matches)))
print(len(np.unique(cdiac_wb_matches)))

In [None]:
shapefile_data_matches

In [None]:
keep= [iso in shapefile_data_matches for iso in data_for_nate_iso3]
data_for_nate_keep = data_for_nate.loc[keep]
len(np.unique(data_for_nate_keep["country_code"]))

In [None]:
cdiac_shapefile_misses
# Zaire
# Bermuda
# Greenland
# Hong Kong
# Macau
# Romania
# World

In [None]:
cdiac_wb_misses 
# Zaire
# Romania

In [None]:
data_for_nate_keep.to_csv(root_folder + "wb_data.csv")

In [None]:
data_for_nate_keep