In [1]:
from cartoframes.auth import set_default_credentials
from cartoframes import read_carto, to_carto
import geopandas as gpd
import pandas as pd
import os
import logging 
from shapely import geometry
import requests
import re
from bs4 import BeautifulSoup
import glob
from zipfile import ZipFile
import shutil

NumExpr defaulting to 8 threads.


In [2]:
# get top-level logger object
logger = logging.getLogger()
for handler in logger.handlers: logger.removeHandler(handler)
# manually set level 
logger.setLevel(logging.INFO)
# print to console
console = logging.StreamHandler()
logger.addHandler(console)
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

In [9]:
data_dir = "data"

In [10]:
logger.info('Pull EBSAs from OneDrive')
# copy the csv with the urls for the EBSA jsons
raw_data_file = os.path.join(os.getenv("OCEANWATCH_DATA_DIR"),'EBSA_url_list.csv')
dest_dir = os.path.join(data_dir, os.path.basename(raw_data_file))
shutil.copy(raw_data_file, dest_dir)

Pull EBSAs from OneDrive


'data/EBSA_url_list.csv'

In [11]:
# read in the csv with the urls for the EBSA jsons
url_df = pd.read_csv(raw_data_file,encoding='latin-1')
url_list = url_df['CHM Url']

In [12]:
# regex pattern for the date (year)
match_st = re.compile(r'geojson') #[/w,/W]*ng-href="[/w,/W]*.geojson
for url in url_list:
    # download the data from the source
    page_file = os.path.join(data_dir, os.path.basename(url))
    r = requests.get(url)  
    c = r.content 
    soup = BeautifulSoup(c)
    for link in soup.findAll('a', attrs={'href': re.compile("geojson$")}):
        href  = link.get('href')
        url = 'https://chm.cbd.int' + href
        raw_data_file = os.path.join(data_dir, os.path.basename(url))
        r = requests.get(url)
    with open(raw_data_file, 'wb') as f:
        f.write(r.content)

In [18]:
ebsa_files = glob.glob(os.path.join(data_dir, '*geojson'))
gdf_list = []

In [16]:
print(ebsa_files)

[['data/ARC_10_EBSA.geojson', 'data/BSCS_6_EBSA.geojson', 'data/NWIO_29_EBSA.geojson', 'data/EA_35_EBSA.geojson', 'data/NWIO_16_EBSA.geojson', 'data/SIO_8_EBSA.geojson', 'data/SIO_21_EBSA.geojson', 'data/EA_19_EBSA.geojson', 'data/NWIO_13_EBSA.geojson', 'data/SEA_12_EBSA.geojson', 'data/EA_13_EBSA.geojson', 'data/BSCS_14_EBSA.geojson', 'data/EA_28_EBSA.geojson', 'data/EA_26_EBSA.geojson', 'data/BSCS_3_EBSA.geojson', 'data/ETTP_20_EBSA.geojson', 'data/EA_15_EBSA.geojson', 'data/NWA_3_EBSA.geojson', 'data/ETTP_4_EBSA.geojson', 'data/ETTP_13_EBSA.geojson', 'data/SIO_34_EBSA.geojson', 'data/EA_2_EBSA.geojson', 'data/SIO_11_EBSA.geojson', 'data/NP_3_EBSA.geojson', 'data/SEA_42_EBSA.geojson', 'data/MED_9_EBSA.geojson', 'data/SEA_31_EBSA.geojson', 'data/SIO_32_EBSA.geojson', 'data/EA_8_EBSA.geojson', 'data/SEA_32_EBSA.geojson', 'data/MED_16_EBSA.geojson', 'data/NWIO_25_EBSA.geojson', 'data/BSCS_11_EBSA.geojson', 'data/NEIO_3_EBSA.geojson', 'data/NP_20_EBSA.geojson', 'data/NP_16_EBSA.geojson',

In [20]:
for file in ebsa_files:
    try:
        gdf = gpd.read_file(file)
        gdf_list.append(gdf)
    except Exception:
        print("Could not read" + file)
     

`data/SIO_3_EBSA.geojson' not recognized as a supported file format.


Could not readdata/SIO_3_EBSA.geojson


In [21]:
# merge geopandas dataframes 
gdf_ebsa = gpd.GeoDataFrame(pd.concat(gdf_list))
print ("Length EBSA = " + str(len(gdf_ebsa.index)))

Length EBSA = 479
                                         NAME  \
0    Wrangel-Herald Shallow and Ratmanov Gyre   
0                 The Small Phyllophora Field   
0                            Oman Arabian Sea   
0                         Northeastern Honshu   
0  Sandspit/Hawks Bay and Adjacent Backwaters   

                                          Workshop  EBSA_ID GLOBAL_ID  \
0                                           Arctic       10    ARC_10   
0                        Black Sea and Caspian Sea        6    BSCS_6   
0  North-West Indian Ocean and Adjacent Gulf Areas       29   NWIO_29   
0                                Seas of East Asia       35     EA_35   
0  North-West Indian Ocean and Adjacent Gulf Areas       16   NWIO_16   

                                            geometry  
0  MULTIPOLYGON (((180.00000 70.14896, 179.99272 ...  
0  POLYGON ((33.16825 45.80095, 33.15300 45.90800...  
0  POLYGON ((59.45319 22.62132, 59.47155 22.63917...  
0  MULTIPOLYGON (((141.73138

In [25]:
# save processed dataset to shapefile
gdf_ebsa.to_file('merged_ebsa.shp',driver='ESRI Shapefile')




In [26]:
# dissolve
gdf_ebsa = gdf_ebsa.dissolve('NAME')
print ("Length EBSA = " + str(len(gdf_ebsa.index)))

# save processed dataset to shapefile
gdf_ebsa.to_file('dissolved_ebsa.shp',driver='ESRI Shapefile')


Length EBSA = 271
                                                                                             geometry  \
NAME                                                                                                    
"Ilha s Tinhosa" (Ilha Principe)                    POLYGON ((7.26433 1.44636, 7.28321 1.44270, 7....   
2les des Sept Frbres et Godorya (Seven Brothers...  MULTIPOLYGON (((43.32699 12.47501, 43.45137 12...   
Agulhas Bank Nursery Area                           POLYGON ((22.22186 -34.26398, 22.22942 -35.104...   
Agulhas Front                                       POLYGON ((77.12534 -30.98200, 78.27117 -32.083...   
Agulhas slope and seamounts                         POLYGON ((23.10876 -37.35889, 21.36935 -37.395...   

                                                                                           Workshop  \
NAME                                                                                                  
"Ilha s Tinhosa" (Ilha Principe)        

