In [2]:
import os
import pandas as pd
import geopandas as gpd
import requests
import json
from datetime import datetime
from shapely.geometry import Point
from shapely.geometry import Polygon
import h3

In [11]:
def today():
    """
    Create a string representing today's date, formatted for subsequent processing   
    
    Parameters:
    None
    
    Returns:
    String representing today's date in YYYY-mm-dd form 
    """
    
    now = datetime.now()
    now_str = now.strftime('%Y-%m-%d')
    return now_str


def csb_extents(shp):
    """
    Create a formatted string representing the bounding extents to constrain CSB order
    
    Parameters:
    shp (str): path to geospatial file (e.g. shapefile) defining spatial extents of search window
    
    Returns:
    String representing bounding coordinates (min x, min y, max x, max y)
    
    See: https://github.com/CI-CMG/pointstore-api-docs/blob/main/pointstore_api.md for more info
    """
    
    gdf = gpd.read_file(shp)
    bounds_string = ', '.join(map(str, gdf.total_bounds))
    return bounds_string


def csb_api_call(api_url, email, extents, startdate, stopdate):
    """
    Make crowdsourced bathy API call to create DCDB data order  
    
    Parameters:
    api_url (str): crowdsourced bathy point store URL
    extents(str): coordinates defining spatial extent of search window (min x, min y, max x, max y)
    startdate (str): beginning of temporal range to consider
    stopdate (str): end of temporal range to consider
    
    Returns:
    JSON response to the API call
    
    See: https://github.com/CI-CMG/pointstore-api-docs/blob/main/pointstore_api.md for more info
    """
    
    payload = {
        "email": email,
        "bbox": extents,
        "datasets": [
            {
                "label": "csb",
                "collection_date": {"start": startdate, "end": stopdate},
            }
        ]
    }

    # Sending the POST request
    response = requests.post(api_url, json=payload)

    # Checking the response
    if response.status_code == 201:
        print("Success:", response.json())
        return response.json()
    else:
        print("Error:", response.status_code, response.text)


def api_call_status(order_id):
    """
    Query status of DCDB CSB data order and extract path for data pickup  
    
    Parameters:
    order_id (str): 
    
    Returns:
    String representing the ID of the crowdsourced bathy csv data file
    
    See: https://github.com/CI-CMG/pointstore-api-docs/blob/main/pointstore_api.md for more info
    """
    
    response = requests.get(order_id)
    if response.status_code == 200:
        # Parse the JSON response
        json_data = response.json()
    
        # Check if the status is "complete"
        if json_data.get('status') == 'complete':
            print("The response is complete.")
            csvname = json_data.get('output_location').split("/")[-1]
            return csvname #json_data.get('output_location')
        else:
            print("The response is not complete.")
    else:
        print(f"Error: Received status code {response.status_code}")    
    

def create_csb_gdf(csb_file, meta_dict):
    """
    Creates a geopandas dataframe containing h3 hex representations of csb sounding data and associated metadata 
    
    Parameters:
    csb_file (str): path to csv file containing csb data
    meta_dict (str): python dictionary containing metadata that will be embedded in the geopandas dataframe
    
    Returns:
    geopandas dataframe containing h3 geometries (level 11) and attribution 
    """
    # Read csv file and create pandas dataframe
    df = pd.read_csv(csb_file)
    
    # Embed defined metadata and create h3 hexagon indices/geometry for each sounding
    df['survey'] = meta_dict['survey']
    df['surveyType'] = meta_dict['surveytype']
    df['time'] = pd.to_datetime(df['time'])
    df['surveyDate'] = df['time'].dt.strftime('%Y%m')
    df['URL'] = meta_dict['url']
    df.rename(columns={'provider': 'surveyOrg'}, inplace=True)
    df['h3_index'] = df.apply(lambda row: h3.geo_to_h3(row['lat'], row['lon'], 11), axis=1)
    df['geometry'] = df['h3_index'].apply(lambda h3_index: Polygon(h3.h3_to_geo_boundary(h3_index, geo_json=True)))
    
    # Convert pandas df to geopandas df
    keep_cols = ['survey','surveyOrg','surveyDate', 'surveyType', 'URL', 'h3_index', 'geometry']
    gdf = gpd.GeoDataFrame(df[keep_cols], geometry=df.geometry, crs="EPSG:4326")
        
    return gdf

In [12]:
# Define input variables
shp = r"/mnt/c/Users/mike/glos_data/test_extent.shp"
csb_api_url = "https://q81rej0j12.execute-api.us-east-1.amazonaws.com/order"
email = "glbathymetry@gmail.com"
bbox = csb_extents(shp)
beg_d, end_d = "2014-01-01", today()
csb_pickup_url = "https://order-pickup.s3.amazonaws.com/"
metadata ={"survey": "Crowdsourced Bathymetry",
           "surveytype": "SBES",
           "url": "https://www.ncei.noaa.gov/maps/iho_dcdb/"}

In [13]:
# Function calls to query DCDB for CSB data and create geopandas df
csb = csb_api_call(csb_api_url, email, bbox, beg_d, end_d)

Success: {'message': 'extract request 58289065-84ea-4154-900e-e8cf98a0a978 created.', 'url': 'https://q81rej0j12.execute-api.us-east-1.amazonaws.com/order/58289065-84ea-4154-900e-e8cf98a0a978'}


In [9]:
csb_csv = api_call_status(csb["url"])

The response is complete.


In [10]:
csb_gdf = create_csb_gdf(os.path.join(csb_pickup_url, csb_csv), metadata)

In [None]:
# Group geopandas df by date (year-month)
grouped = csb_gdf.groupby(['surveyDate'])

# Iterate through array containing groups, remove duplicate records and write output geojson to disk
for date, group in grouped:
    # Remove duplicate rows
    group = group.drop_duplicates().reset_index(drop=True)
    
    # Create a filename
    date_str = date[0]
    filename = f"/mnt/c/Users/mike/glos_data/ncei_csb/api_calls/testing/TEST_csb_output_{date_str}_level11.geojson"
        
    # Write the group to a GeoJSON file
    group.to_file(filename)

    print(f"Saved {filename} with {len(group)} records.")

In [None]:
grouped_df = df.groupby('h3_index').agg({
    'depth': 'median'          # Count of dates (or you could use 'first', 'last', etc.)
}).reset_index()
grouped_df