# Provide geometries from CARTO with geostore IDs
This script reads a CARTO table provides each feature with a geostore ID and writes back the table to CARTO.
## Tables to read:
- [River basins - level 3](https://resourcewatch.carto.com/u/wri-rw/tables/wat_068_rw0_watersheds_edit/public?redirected=true) `SELECT * FROM "wri-rw".wat_068_rw0_watersheds_edit WHERE level = 3`

- [Country geometries - coastal](https://resourcewatch.carto.com/u/wri-rw/dataset/gadm36_0) `SELECT * FROM "wri-rw".gadm36_0 WHERE coastal = True`

- [EEZ geometries](https://resourcewatch.carto.com/u/wri-rw/tables/com_011_1_maritime_boundaries_territorial_waters/public?redirected=true): WRI still working out a few discrepancies with the country iso codes used in the two datasets. We want to ensure consistency between the code columns so a country geometry can be matched with its EEZ geometry. We plan to make some edits to the gid_0 and iso_ter1 fields by next week (May31st week), but the underlying geometries should remain the same.

## Methodology
1. Create a geojson from the CARTO table in new row
2. Get ID for geojson from Geostore in new row
3. Publish to CARTO


In [30]:
import os
import re
import json
import pandas as pd
import geopandas as gpd
import cartoframes as cf
import shapely
import time

# Cartoframes docs --> https://carto.com/developers/cartoframes/reference/

In [2]:
os.listdir('./')

['storageScript.ipynb',
 'countries_geojson_dict.json',
 'watersheds_df_id_v20210528.csv',
 'watershed_g_id_dict.json',
 '.DS_Store',
 'Script_geostore_IDs_OW-79.ipynb',
 'valid_eez_subset.csv',
 'concep-Convert raster to geometries.ipynb',
 'data_management',
 'example_migrate_LMIPy.ipynb',
 'example_migrate_script.ipynb',
 'countries_df_v20212805.csv',
 'production_backup',
 'explore_page_metrics.ipynb',
 'countries_g_id_dict.json',
 'com_011_1_maritime_boundaries_territorial_waters_v20212705.csv',
 'dashboards',
 'eez_g_id_dict.json',
 'Greta_playground.ipynb',
 'Api_definition',
 'gee_image_management.ipynb',
 '.ipynb_checkpoints',
 'eez_geojson_dict.json',
 'RW_prod_staging_match_20210520-164448.json',
 'GEE_task.ipynb',
 'RW_prod_staging_match_20210520-172552.json']

In [4]:
## env file for gcs upload
env_path = "../.env"
with open(env_path) as f:
    env = {}
    for line in f:
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value
        
list(env.keys())

['RW_CARTO_KEY', 'RW_CARTO_ACCOUNT']

## Functions

In [5]:
import geojson, json, requests

def shpToGeojson(s):
    """Using a Shapely object, we should build a Geometry object."""
    if s.geom_type in ['Polygon', 'Point', 'MultiPoint','MultiPolygon']:
        atts={'geojson': {'type': 'FeatureCollection',
                        'features': [{'type': 'Feature',
                            'properties': {},
                            'geometry': geojson.Feature(geometry=s, properties={}).get('geometry')
                                    }]}}
        return atts
    else:
        raise ValueError('shape object was not of suitable geometry type')
    
def registerGeostore(geojson, url = 'http://api.resourcewatch.org/v1/geostore'):
        """Register valid geojson to the geostore service. Return the geostore id.
        """
        try:
            body = json.loads(json.dumps(geojson))
        except:
            print('Failed to create body')
            return ''
        header= {
                'Content-Type':'application/json'
                }
        
        r = requests.post(url, headers=header, json=body)
        if r.status_code == 200:
            return r.json().get('data', {}).get('id','')
        else:
            print(f'Failed to register geostore. Error {r.status_code}')
            return f'Error {r.status_code}'

## Authentication

In [6]:
creds = cf.auth.Credentials(username=env['RW_CARTO_ACCOUNT'], api_key=env['RW_CARTO_KEY'])

## Processing

### Accessing the tables
#### Countries geometry

In [17]:
countries_df = cf.io.carto.read_carto('SELECT * FROM "wri-rw".gadm36_0 WHERE coastal = True', credentials=creds)
countries_df.head()

KeyboardInterrupt: 

#### Watersheds

In [None]:
watersheds_df = cf.io.carto.read_carto('SELECT * FROM "wri-rw".wat_068_rw0_watersheds_edit WHERE level = 3', credentials=creds)
watersheds_df.head()

#### EEZ 

In [18]:
eez_table = 'com_011_1_maritime_boundaries_territorial_waters'
eez_df = cf.io.carto.read_carto(eez_table, credentials=creds)

eez_df.head()

Unnamed: 0,cartodb_id,the_geom,mrgid,geoname,pol_type,mrgid_ter1,territory1,mrgid_sov1,sovereign1,iso_ter1,x_1,y_1,mrgid_eez,area_km2
0,160,"MULTIPOLYGON (((-13.70488 9.52191, -13.70493 9...",49186,Guinean 12 NM,12NM,2122,Guinea,2122,Guinea,GIN,-14.185269,9.94725,8472,9306.0
1,161,"MULTIPOLYGON (((-3.16821 4.89856, -3.17263 4.8...",49187,Ivory Coast 12 NM,12NM,2161,Ivory Coast,2161,Ivory Coast,CIV,-5.271691,4.895276,8473,12375.0
2,162,"MULTIPOLYGON (((8.45250 4.61667, 8.44333 4.600...",49188,Nigerian 12 NM,12NM,2253,Nigeria,2253,Nigeria,NGA,5.607847,5.174516,8474,20404.0
3,166,"MULTIPOLYGON (((12.39538 -6.32436, 12.39728 -6...",49192,Angolan 12 NM,12NM,2150,Angola,2150,Angola,AGO,12.684313,-11.326831,8478,34318.0
4,167,"MULTIPOLYGON (((39.72996 -4.68341, 39.77037 -4...",49193,Tanzanian 12 NM,12NM,2205,Tanzania,2205,Tanzania,TZA,39.926449,-7.641209,8479,16008.0


### Creating geojson and getting geostore id

#### Countries geometries

In [62]:
countries_g_id_dict = {}
countries_geojson_dict = {}
for index, row in countries_df.iterrows():
    g = shpToGeojson(row.the_geom)
    g_id = registerGeostore(g)
    if re.search('^Error ', g_id)!=None: #if there is an error with geostore
        countries_geojson_dict[row.cartodb_id] = g
    else: 
        countries_g_id_dict[row.cartodb_id] = g_id

Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 413
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 413
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geostore. Error 500
Failed to register geosto

In [61]:
print(len(countries_g_id_dict))
print(len(countries_geojson_dict))
len(countries_g_id_dict)+len(countries_geojson_dict)

208

In [None]:
countries_df.shape

In [86]:
# save dicts locally
with open('countries_g_id_dict.json', 'w') as fp:
    json.dump(countries_g_id_dict, fp)
with open('countries_geojson_dict.json', 'w') as fp:
    json.dump(countries_geojson_dict, fp)

In [34]:
countries_df.to_csv(f'./countries_df_v20210528.csv')

#### Watersheds

In [46]:
watershed_g_id_dict = {}
watershed_geojson_dict = {}
for index, row in watersheds_df.iterrows():
    g = shpToGeojson(row.the_geom)
    g_id = registerGeostore(g)
    if re.search('^Error ', g_id)!=None: #if there is an error with geostore
        watershed_geojson_dict[row.cartodb_id] = g
    else: 
        watershed_g_id_dict[row.cartodb_id] = g_id

In [59]:
print(len(watershed_g_id_dict))
print(len(watershed_geojson_dict))
len(watershed_g_id_dict)+len(watershed_geojson_dict)

292
0


292

In [60]:
watersheds_df.shape

(292, 16)

In [85]:
# save dicts locally
with open('watershed_g_id_dict.json', 'w') as fp:
    json.dump(watershed_g_id_dict, fp)


In [81]:
# transform dictionary to table with two columns and merge
watershed_id_df = pd.DataFrame.from_dict(watershed_g_id_dict, orient = 'index', columns = ["geo_id"])
watershed_id_df.reset_index(inplace=True)
watershed_id_df = watershed_id_df.rename(columns = {'index':'cartodb_id'})
watershed_id_df.merge(watersheds_df, left_on='cartodb_id', right_on='cartodb_id')
watershed_id_df.head()

Unnamed: 0,cartodb_id,geo_id
0,7,5cc9febfda14763c2c4cfdea65eb3b18
1,8,e03ae30a0b3fa9d3bef7b68ec04f6977
2,9,ffd4ce51f1407a2bb64a6d719dc00106
3,10,4291c2aa881be2d025582cba57e9ce42
4,11,d1bd595eaa7ba2dbc65e09a4eab103f9


In [82]:
#save locally
watershed_id_df.to_csv(f'./watersheds_df_id_v20210528.csv')

In [33]:
watershed_id_df = pd.read_csv(f'./watersheds_df_id_v20210528.csv')

In [34]:
watershed_id_df.head()

Unnamed: 0.1,Unnamed: 0,cartodb_id,geo_id
0,0,7,5cc9febfda14763c2c4cfdea65eb3b18
1,1,8,e03ae30a0b3fa9d3bef7b68ec04f6977
2,2,9,ffd4ce51f1407a2bb64a6d719dc00106
3,3,10,4291c2aa881be2d025582cba57e9ce42
4,4,11,d1bd595eaa7ba2dbc65e09a4eab103f9


#### EEZ

In [50]:
eez_g_id_dict = {}
eez_geojson_dict = {}
for index, row in eez_df.iterrows():
    g = shpToGeojson(row.the_geom)
    g_id = registerGeostore(g)
    if re.search('^Error ', g_id)!=None: #if there is an error with geostore
        eez_geojson_dict[row.cartodb_id] = g
    else: 
        eez_g_id_dict[row.cartodb_id] = g_id

Failed to register geostore. Error 500
Failed to register geostore. Error 504
Failed to register geostore. Error 500
Failed to register geostore. Error 500


In [57]:
eez_df.shape

(233, 14)

In [58]:
print(len(eez_g_id_dict))
print(len(eez_geojson_dict))
len(eez_g_id_dict)+len(eez_geojson_dict)

229
4


233

In [84]:
# save dicts locally
with open('eez_g_id_dict.json', 'w') as fp:
    json.dump(eez_g_id_dict, fp)
with open('eez_geojson_dict.json', 'w') as fp:
    json.dump(eez_geojson_dict, fp)

In [23]:
## save locally
eez_df.to_csv(f'./{eez_table}_v20210528.csv')

## check failed registration, those elements in geojson_dict
- for countries
- for eez

### EEZ
Process to validate the geometry. Use PostGIS and `ST_MakeValid` ([documentation](https://postgis.net/docs/ST_MakeValid.html)). The geometries are returned as hex and transformed into wkb. The valid geometry can be sent to geostore and added to the dictionary that contains the cartodb_id and the geostore id. 

In [7]:
with open('eez_geojson_dict.json') as json_file:
    eez_geojson_dict = json.load(json_file)

In [8]:
', '.join(eez_geojson_dict.keys())

'223, 145, 151, 218'

In [40]:
eez_subset = cf.io.carto.read_carto(f'SELECT the_geom AS before_geom, ST_MakeValid(the_geom) AS after_geom FROM com_011_1_maritime_boundaries_territorial_waters WHERE cartodb_id in ({", ".join(eez_geojson_dict.keys())})', 
                                    credentials=creds)

In [9]:
#eez_subset.to_csv("valid_eez_subset.csv")
eez_subset = gpd.read_file('valid_eez_subset.csv')

In [10]:
eez_subset

Unnamed: 0,field_1,before_geom,after_geom,geometry
0,0,0106000020E61000002D00000001030000000100000006...,0106000020E61000002D000000010300000004000000BE...,
1,1,0106000020E610000003000000010300000009000000A9...,0106000020E61000000300000001030000006601000050...,
2,2,0106000020E6100000100000000103000000010000007D...,0106000020E6100000100000000103000000010000007D...,
3,3,0106000020E61000000B000000010300000010000000D9...,0106000020E61000000B000000010300000010000000D9...,


In [59]:
eez_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   cartodb_id  233 non-null    int64   
 1   the_geom    233 non-null    geometry
 2   mrgid       233 non-null    int64   
 3   geoname     233 non-null    object  
 4   pol_type    233 non-null    object  
 5   mrgid_ter1  233 non-null    int64   
 6   territory1  233 non-null    object  
 7   mrgid_sov1  233 non-null    int64   
 8   sovereign1  233 non-null    object  
 9   iso_ter1    233 non-null    object  
 10  x_1         233 non-null    float64 
 11  y_1         233 non-null    float64 
 12  mrgid_eez   233 non-null    int64   
 13  area_km2    233 non-null    float64 
dtypes: float64(3), geometry(1), int64(5), object(5)
memory usage: 25.6+ KB


In [133]:
type(eez_df.the_geom[0])

shapely.geometry.multipolygon.MultiPolygon

In [12]:
eez_subset['before_geom']

0    0106000020E61000002D00000001030000000100000006...
1    0106000020E610000003000000010300000009000000A9...
2    0106000020E6100000100000000103000000010000007D...
3    0106000020E61000000B000000010300000010000000D9...
Name: before_geom, dtype: object

In [26]:
for g in eez_subset.index:
    try: 
        initial_string2 = eez_subset.loc[[g]].after_geom        
        initial_byte2 = bytes(bytearray.fromhex(initial_string2.values[0]))
        geom_shape = shapely.wkb.loads(initial_byte2)
        if geom_shape.is_valid:
            print("is valid")
            gjson = shpToGeojson(geom_shape)
            print("geojson")           
            g_id = registerGeostore(gjson)
            print(g_id)

            
    except:
        print(len(initial_string2.values))

is valid
geojson
Failed to register geostore. Error 500
Error 500
is valid
geojson
4bbd776e1df026c470a7a54b1b0a73a2
is valid
geojson
Failed to register geostore. Error 500
Error 500
is valid
geojson
Failed to register geostore. Error 500
Error 500


In [31]:
for g in eez_subset.index:
    try: 
        initial_string2 = eez_subset.loc[[g]].after_geom        
        initial_byte2 = bytes(bytearray.fromhex(initial_string2.values[0]))
        geom_shape = shapely.wkb.loads(initial_byte2)
        if geom_shape.is_valid:
            print("is valid")
            gjson = shpToGeojson(geom_shape)
            print("geojson")           
            g_id = registerGeostore(gjson)
            print(g_id)
            time.sleep(10)
    except:
        print(len(initial_string2.values))

is valid
geojson
Failed to register geostore. Error 500
Error 500
is valid
geojson
4bbd776e1df026c470a7a54b1b0a73a2
is valid
geojson
Failed to register geostore. Error 500
Error 500
is valid
geojson
Failed to register geostore. Error 500
Error 500


In [29]:
time.sleep(10)

NameError: name 'time' is not defined

In [21]:
xx = gpd.GeoSeries([geom_shape]).to_json()

In [22]:
body = xx
header = {
    'Content-Type':'application/json'
                }
r = requests.post('http://api.resourcewatch.org/v1/geostore', headers=header, json=body)


In [23]:
r

<Response [400]>

In [25]:
g_id = registerGeostore(gjson)

Failed to register geostore. Error 500


In [98]:
for g in eez_geojson_dict:
    rr = eez_df.loc[eez_df['cartodb_id']== int(g)]
    
    #s = rr.the_geom
    print(rr.sovereign1)
    #s2 = s.buffer(0)
    #print(s2.is_valid)
    #g.is_valid
    #break

13    Australia
Name: sovereign1, dtype: object
78    United States
Name: sovereign1, dtype: object
84    United States
Name: sovereign1, dtype: object
119    New Zealand
Name: sovereign1, dtype: object


In [33]:
len(eez_geojson_dict)

4

In [63]:
## check failed registration, geostore_id  == ''
## s.is_valid, s.make_valid / buffer(0)

### Countries

In [99]:
with open('countries_geojson_dict.json') as json_file:
    countries_geojson_dict = json.load(json_file)

In [100]:
', '.join(countries_geojson_dict.keys())

'148, 204, 211, 254, 152, 164, 168, 171, 190, 12, 15, 26, 33, 92, 86, 90, 105, 40, 103, 44, 43, 54, 183, 112, 116, 123, 126, 76, 80, 70, 71, 199, 143, 176, 179, 177, 227, 235'

In [101]:
countries_subset = cf.io.carto.read_carto(f'SELECT * FROM "wri-rw".gadm36_0 WHERE cartodb_id in ({", ".join(countries_geojson_dict.keys())})', 
                                    credentials=creds)

In [102]:
countries_subset.head()

Unnamed: 0,cartodb_id,the_geom,gid_0,name_0,coastal
0,148,"MULTIPOLYGON (((97.79915 8.83028, 97.79944 8.8...",MMR,Myanmar,True
1,204,"MULTIPOLYGON (((-56.13278 46.78750, -56.13306 ...",SPM,Saint Pierre and Miquelon,True
2,211,"MULTIPOLYGON (((12.94139 55.46375, 12.94139 55...",SWE,Sweden,True
3,254,"MULTIPOLYGON (((19.42097 -34.67903, 19.42097 -...",ZAF,South Africa,True
4,152,"MULTIPOLYGON (((32.88820 -26.28681, 32.88820 -...",MOZ,Mozambique,True


In [None]:
for g in countries_geojson_dict:
    rr = countries_subset.loc[countries_subset['cartodb_id']==int(g)]   
    s = rr.the_geom
    if s.is_valid:
        print(rr)
    else:
        print(g)
    #g.is_valid
    #break

In [64]:
## check failed registration, geostore_id  == ''
## s.is_valid, s.make_valid / buffer(0)

## write carto table

### Watersheds

In [36]:
watershed_id_df = pd.read_csv(f'./watersheds_df_id_v20210528.csv')

In [37]:
watershed_id_df.head()

Unnamed: 0.1,Unnamed: 0,cartodb_id,geo_id
0,0,7,5cc9febfda14763c2c4cfdea65eb3b18
1,1,8,e03ae30a0b3fa9d3bef7b68ec04f6977
2,2,9,ffd4ce51f1407a2bb64a6d719dc00106
3,3,10,4291c2aa881be2d025582cba57e9ce42
4,4,11,d1bd595eaa7ba2dbc65e09a4eab103f9


In [84]:
watershed_id_df.rename(columns = {"geo_id":"geostore_prod"}, inplace=True)

In [85]:
watershed_id_df.head()

Unnamed: 0.1,Unnamed: 0,cartodb_id,geostore_prod
0,0,7,5cc9febfda14763c2c4cfdea65eb3b18
1,1,8,e03ae30a0b3fa9d3bef7b68ec04f6977
2,2,9,ffd4ce51f1407a2bb64a6d719dc00106
3,3,10,4291c2aa881be2d025582cba57e9ce42
4,4,11,d1bd595eaa7ba2dbc65e09a4eab103f9


#### Using requests code

In [88]:
sql = 'SELECT * FROM wat_068_rw0_watersheds_edit WHERE level = 3'

In [89]:
def get_query_carto(sql, creds=None, account='wri-rw'):
    urlCarto = f"https://{account}.carto.com/api/v2/sql"
    params = {"q": sql}
    if creds: params["api_key"] = creds
    r = requests.get(urlCarto, params=params)
    if r.status_code==200:
        return r.json().get('rows', None)
    return f'Error {r.status_code}'

In [90]:
%%time
watersheds_df = get_query_carto(sql=sql)

CPU times: user 2.82 s, sys: 583 ms, total: 3.4 s
Wall time: 13 s


In [91]:
print(watersheds_df[0].keys())

dict_keys(['cartodb_id', 'the_geom', 'the_geom_webmercator', 'hybas_id', 'next_down', 'next_sink', 'main_bas', 'dist_sink', 'dist_main', 'sub_area', 'up_area', 'pfaf_id', 'endo', 'coast', '_order', 'sort', 'level', 'geostore_prod', 'geostore_staging'])


In [76]:
sql_alter = '''
    ALTER TABLE wat_068_rw0_watersheds_edit
    ADD COLUMN geostore_prod VARCHAR,
    ADD COLUMN geostore_staging VARCHAR;
'''

In [77]:
%%time
get_query_carto(sql_alter, creds=creds.api_key)

CPU times: user 21.6 ms, sys: 3.78 ms, total: 25.4 ms
Wall time: 697 ms


'Error 400'

In [101]:
watershed_update_list = watershed_id_df[["cartodb_id", "geostore_prod"]].to_dict(orient = "records")
print(watershed_update_list[0])

{'cartodb_id': 7, 'geostore_prod': '5cc9febfda14763c2c4cfdea65eb3b18'}


In [107]:
%%time
records = []
for d in watershed_update_list:
    geo_id = d['geostore_prod']
    cartodb_id = d['cartodb_id']
    update_sql = f"""
        UPDATE wat_068_rw0_watersheds_edit 
           SET geostore_prod = '{geo_id}'
         WHERE level = 3 
           AND cartodb_id = {cartodb_id}
        """
    response = get_query_carto(update_sql, creds=creds.api_key)
    records += [{
        **d,
        'response':response
    }]

CPU times: user 5.71 s, sys: 465 ms, total: 6.18 s
Wall time: 4min 4s


In [108]:
len([r for r in records if r.get("response", None)])

0

#### Using cartoframes

In [103]:
# cf.io.carto.to_carto(df, <tablename>, if_exists='replace', credentials=creds)
# cf.update_privacy_table(<tablename>, privacy='public', credentials=creds)
# api sql by CARTO, alter table: two new fields (geostore_prod, and geostore_staging), 
# https://carto.com/developers/sql-api/reference/#operation/postSQLStatement
# update row
# bulk update: insert table and then update table using cartodb_id

#### Countries

In [None]:
# cf.io.carto.to_carto(df, <tablename>, if_exists='replace', credentials=creds)
# cf.update_privacy_table(<tablename>, privacy='public', credentials=creds)

#### EEZ

In [None]:
# cf.io.carto.to_carto(df, <tablename>, if_exists='replace', credentials=creds)
# cf.update_privacy_table(<tablename>, privacy='public', credentials=creds)

In [None]:
#simplify, reduce number of the points or reduce the precision of the points (decimals in the lat long). 