In [1]:
# Prerequisites: python 3.6 or later
import pandas as pd
import requests
import json
import uuid
import pprint
import datetime
pp = pprint.PrettyPrinter(indent=2)

### The following code is from the nb provided by Dan Feldman; helper functions to assist in registering datasets and resources

In [2]:
# This is a convenience method to handle api responses. The main portion of the notebook starts in the the next cell
def handle_api_response(response, print_response=False):
    parsed_response = response.json()

    if print_response:
        pp.pprint({"API Response": parsed_response})
    
    if response.status_code == 200:
        return parsed_response
    elif response.status_code == 400:
        raise Exception("Bad request ^")
    elif response.status_code == 403:
        msg = "Please make sure your request headers include X-Api-Key and that you are using correct url"
        raise Exception(msg)
    else:
        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        msg = f"""\n\n
        ------------------------------------- BEGIN ERROR MESSAGE -----------------------------------------
        It seems our server encountered an error which it doesn't know how to handle yet. 
        This sometimes happens with unexpected input(s). In order to help us diagnose and resolve the issue, 
        could you please fill out the following information and email the entire message between ----- to
        danf@usc.edu:
        1) URL of notebook (of using the one from https://hub.mybinder.org/...): [*****PLEASE INSERT ONE HERE*****]
        2) Snapshot/picture of the cell that resulted in this error: [*****PLEASE INSERT ONE HERE*****]
        
        Thank you and we apologize for any inconvenience. We'll get back to you as soon as possible!
        
        Sincerely, 
        Dan Feldman
        
        Automatically generated summary:
        - Time of occurrence: {now}
        - Request method + url: {response.request.method} - {response.request.url}
        - Request headers: {response.request.headers}
        - Request body: {response.request.body}
        - Response: {parsed_response}

        --------------------------------------- END ERROR MESSAGE ------------------------------------------
        \n\n
        """

        raise Exception(msg)

In [3]:
# For real interactions with the data catalog, use api.mint-data-catalog.org
url = "https://sandbox.mint-data-catalog.org"

In [4]:
# When you register datasets or resources, we require you to pass a "provenance_id". This a unique id associated
# with your account so that we can keep track of who is adding things to the data catalog. For sandboxed interactions
# with the data catalog api, please use this provenance_id:
provenance_id = "e8287ea4-e6f2-47aa-8bfc-0c22852735c8"

In [5]:
# Step 1: Get session token to use the API
resp = requests.get(f"{url}/get_session_token").json()
print(resp)
api_key = resp['X-Api-Key']

request_headers = {
    'Content-Type': "application/json",
    'X-Api-Key': api_key
}

{'X-Api-Key': 'mint-data-catalog:96c4d87b-3551-4d95-bf84-96dd86f07a8e:1ec0107c-295c-4404-8254-2355411e0264'}


Use Dan's example to check for the streamflow variable from svo:

In [6]:
# If you need to check if specific standard variables have already been registered in the data catalog, 
# you can search by name and data catalog will return existing records.
nonexistent_name = str(uuid.uuid4())
print(f"This name does not exist: {nonexistent_name}")

search_query = {
    "name__in": ["Time_Standard_Variable", "channel_water_x-section__volume_flow_rate", nonexistent_name]
}

resp = requests.post(f"{url}/knowledge_graph/find_standard_variables", 
                                        headers=request_headers,
                                        json=search_query)
parsed_response = handle_api_response(resp, print_response=True)

# Below is how you'd extract standard_variables from the response if you need to reference them (their record_ids)
# later:
# 
# existing_standard_variables = parsed_response["standard_variables"]
# print(existing_standard_variables)

This name does not exist: bd6d7051-4788-427a-9c91-f892ff4c5aa0
{ 'API Response': { 'result': 'success',
                    'standard_variables': [ { 'description': '',
                                              'id': '28f016f3-60fc-524c-a888-b40bc694bfb6',
                                              'name': 'channel_water_x-section__volume_flow_rate',
                                              'ontology': 'SVOv1.0',
                                              'uri': 'http://geoscienceontology.org/svo/svl/variable#channel%40context%257Ein_%28water_flowing_x-section%29__volume_flow_rate'},
                                            { 'description': '',
                                              'id': 'eeaa3017-a87f-5887-a6cd-a4e196aa8df4',
                                              'name': 'Time_Standard_Variable',
                                              'ontology': 'MyOntology',
                                              'uri': 'http://my_ontology_uri.org/stan

Variables are not present so need to register them:

In [7]:
# @param[name] standard variable name (aka label)
# @param[ontology] name of the ontology where standard variables are defined
# @param[uri] uri of standard variable name (note that this is full uri, which includes the ontology)
standard_variable_defs = {
    "standard_variables": [
        {
            "name": "gage__observation_time",
            "ontology": "SVOv1.0",
            "uri": "http://geoscienceontology.org/svo/svl/variable#gage%40context%257Eat_observation__observation_time",
        },
        {
            "name": "channel_water_x-section__volume_flow_rate",
            "ontology": "SVOv1.0",
            "uri": "http://geoscienceontology.org/svo/svl/variable#channel%40context%257Ein_%28water_flowing_x-section%29__volume_flow_rate"
        },
        {
            "name": "gage_water_surface__height",
            "ontology": "SVOv1.0",
            "uri": "http://geoscienceontology.org/svo/svl/variable#gage%40context%257Eat_%28water_surface%29__height"
        }
    ]
}

resp = requests.post(f"{url}/knowledge_graph/register_standard_variables", 
                    headers=request_headers, 
                    json=standard_variable_defs)


# If request is successful, it will return 'result': 'success' along with a list of registered standard variables
# and their record_ids. Those record_ids are unique identifiers (UUID) and you will need them down the road to 
# register variables
parsed_response = handle_api_response(resp, print_response=True)
records = parsed_response['standard_variables']

# iterate through the list of returned standard variable objects and save
# the ones whose names match the one that we want and store them in python variables
time_standard_variable = next(record for record in records if record["name"] == "gage__observation_time")
streamflow_standard_variable = next(record for record in records if record["name"] == "channel_water_x-section__volume_flow_rate")
gageheight_standard_variable = next(record for record in records if record["name"] == "gage_water_surface__height")

## Uncomment below to see the structure of a specific variable:
# pp.pprint({"Time Standard Variable": time_standard_variable})
# pp.pprint({"Temperature Standard Variable": temperature_standard_variable})

{ 'API Response': { 'result': 'success',
                    'standard_variables': [ { 'description': '',
                                              'name': 'gage__observation_time',
                                              'ontology': 'SVOv1.0',
                                              'record_id': 'c8b8a9f6-9288-551c-97dc-9be72e1c6eba',
                                              'uri': 'http://geoscienceontology.org/svo/svl/variable#gage%40context%257Eat_observation__observation_time'},
                                            { 'description': '',
                                              'name': 'channel_water_x-section__volume_flow_rate',
                                              'ontology': 'SVOv1.0',
                                              'record_id': '28f016f3-60fc-524c-a888-b40bc694bfb6',
                                              'uri': 'http://geoscienceontology.org/svo/svl/variable#channel%40context%257Ein_%28water_flowing_x-section%29__v

Start dataset & resource registration:

In [8]:
dataset_id = "4e8ade31-7729-4891-a462-2dac66158512" # This is optional; if not given, it will be auto-generated

## An example of how to generate a random uuid yourself (will be different every time method is run)
# print(str(uuid.uuid4()))
# print(str(uuid.uuid4()))
#
## This will generate the same record_id as long as the input string remains the same
#
# input_string = "some string 34_"
# print(str(uuid.uuid5(uuid.NAMESPACE_URL, str(input_string))))
# print(str(uuid.uuid5(uuid.NAMESPACE_URL, str(input_string))))

In [9]:
dataset_defs = {
    "datasets": [
        {
            "record_id": dataset_id, # Remove this line if you want to create a new dataset
            "provenance_id": provenance_id,
            ##"metadata": {
            ##    "any_additional_metadata": "content"
            ##},
            "description": "USGS Streamflow data at gages in the Boulder Creek Watershed",
            "name": "USGS Streamflow Boulder"
        }
    ]
}

resp = requests.post(f"{url}/datasets/register_datasets", 
                                        headers=request_headers,
                                        json=dataset_defs)


parsed_response = handle_api_response(resp, print_response=True)

datasets = parsed_response["datasets"]

# Iterate through the list of returned datasets objects and save the one whose name matches our name 
# to a Python variable
dataset_record = next(record for record in datasets if record["name"] == "USGS Streamflow Boulder")
# Extract dataset record_id and store it in a variable
dataset_record_id = dataset_record["record_id"]

{ 'API Response': { 'datasets': [ { 'description': 'USGS Streamflow data at '
                                                   'gages in the Boulder Creek '
                                                   'Watershed',
                                    'json_metadata': {},
                                    'name': 'USGS Streamflow Boulder',
                                    'provenance_id': 'e8287ea4-e6f2-47aa-8bfc-0c22852735c8',
                                    'record_id': '4e8ade31-7729-4891-a462-2dac66158512'}],
                    'result': 'success'}}


In [10]:
#one time uuid generator
#print(str(uuid.uuid4()))

Choose the correct units for each variable associated with the dataset. The "Name" attribute is the name of the variable in the file. The "standard variable id" is the id in SVO.

In [11]:
# Again, these ids are optional and will be auto-generated if not given. They are included here in order
# to make requests indempotent (so that new records aren't beeing generated every time this code block is run)

time_variable_record_id = '9358af57-192f-4cc3-9bee-837e76819674'
streamflow_variable_record_id = 'c22deb3b-ebda-48cb-950a-2f4f00498197'
gageheight_variable_record_id = '499c54ed-3769-41ec-adac-30cf513c64e7'

variable_defs = {
    "variables": [
        {
            "record_id": time_variable_record_id, # If you remove this line, record_id will be auto-generated
            "dataset_id": dataset_record_id,
            "name": "Time",
            "metadata": {
                "units": "ISO8601_datetime"
                # Can include any other metadata that you want to associate with the variable
            },
            "standard_variable_ids": [
                # Recall that we created "time_standard_variable" python object after
                # registering our standard variables. We just need its unique identifier - 
                # record_id - in order to associate it with our "Time" variable. Also, note 
                # that "standard_variable_ids" is an array, so you can associate multiple
                # standard variables with our "local" variable (and it does not have
                # to be done all at once). That is how we can semantically link multiple
                # standard names and ontologies later on
                time_standard_variable["record_id"]
            ]
        },
        {
            "record_id": streamflow_variable_record_id, # If you remove this line, record_id will be auto-generated
            "dataset_id": dataset_record_id, # from register_datasets() call
            "name": "Streamflow",
            "metadata": {
                "units": "ft^3/s"
            },
            "standard_variable_ids": [
                streamflow_standard_variable["record_id"]
            ]
        },
        {
            "record_id": gageheight_variable_record_id, # If you remove this line, record_id will be auto-generated
            "dataset_id": dataset_record_id, # from register_datasets() call
            "name": "GageHeight",
            "metadata": {
                "units": "ft"
            },
            "standard_variable_ids": [
                gageheight_standard_variable["record_id"]
            ]
        }
    ]
}

resp = requests.post(f"{url}/datasets/register_variables", 
                                        headers=request_headers,
                                        json=variable_defs)

parsed_response = handle_api_response(resp, print_response=True)
variables = parsed_response["variables"]

time_variable = next(record for record in variables if record["name"] == "Time")
streamflow_variable = next(record for record in variables if record["name"] == "Streamflow")
gageheight_variable = next(record for record in variables if record["name"] == "GageHeight")
## Uncomment below to print individual records
# print(f"Time Variable: {time_variable}")
# print(f"Temperature Variable: {temperature_variable}")

{ 'API Response': { 'result': 'success',
                    'variables': [ { 'dataset_id': '4e8ade31-7729-4891-a462-2dac66158512',
                                     'json_metadata': { 'units': 'ISO8601_datetime'},
                                     'name': 'Time',
                                     'record_id': '9358af57-192f-4cc3-9bee-837e76819674'},
                                   { 'dataset_id': '4e8ade31-7729-4891-a462-2dac66158512',
                                     'json_metadata': {'units': 'ft^3/s'},
                                     'name': 'Streamflow',
                                     'record_id': 'c22deb3b-ebda-48cb-950a-2f4f00498197'},
                                   { 'dataset_id': '4e8ade31-7729-4891-a462-2dac66158512',
                                     'json_metadata': {'units': 'ft'},
                                     'name': 'GageHeight',
                                     'record_id': '499c54ed-3769-41ec-adac-30cf513c64e7'}]}}


### Dataset specific: Clean up and split up data into files by location.
The data files need to be put in csv format, and should be split up for better parsability and metadata annotation.
The date format needs to be changed to ISO-8601 standard.

In [12]:
# The header separator separates general information at the top of the file (which is discarded) from the 
# information for each individual file concatenated in the overall file
header_end = '# -----------------------------------------------------------------------------------\n'
# The first two lines of each site's file are a blank comment line followed by a line that starts with site_sep
site_sep = '# Data provided for site '
# Relative location of the input file
ext = 'HAND-Data/streamgages/'

# read in file and place in a list of lines:
with open(ext+'USGS-Streamgages-HUC10190005.tsv') as f:
    lines = f.readlines()

# determine where the concatenated files start, discard the top header information
start_index = lines.index(header_end) + 1
# define the comment indicator
comment = '#'
# define the column names by a non-coded (human readable) name
columns = ['Time','Streamflow','GageHeight']
# first entry on the line with the column labels
column_labels = 'agency_cd'
# indicator for the start of a line of data
dataline = 'USGS'

# is a file currently open for writing?
fopen = False
# currently in a comment section?
comment_section = True
# translate line to current output file?
writeline = False
# skip the current line from writing to file?
skip = False
# for debugging: detected first data line after the column labels
#one_data_line = False

#iterate over the lines in the input file, starting past the main header
for line in lines[start_index:]:
    
    # if this line should be skipped, set writeline to False and toggle skip
    if skip:
        writeline = False
        skip = False
        
    # if current line is a coment line and not in a comment section at the moment
    # then close the current write file (if open), and don't write the line to file
    # (this is the empty comment line between concatenated files)
    if line.startswith(comment) and not comment_section:
        comment_section = True
        if fopen:
            f.close()
            fopen = False
        writeline = False
    
    # if current line is the first line in a new file, grab the name of the site
    # and open a new file with the name of the site in the filename
    if line.startswith(site_sep):
        site_name = 'USGS' + line.split(site_sep)[1].strip()
        print('Site found: ',site_name)
        f = open(ext+site_name+'.csv','w')
        fopen = True
        writeline = True
        
    # if the current line is a comment line and currently in the comment section and
    # a file is currently open, then write the comment to file
    if line.startswith(comment) and comment_section and fopen:
        writeline = True
    
    # if the current line does not start with a comment and we are in the comment section, 
    # then we have reached the column labels line; toggle comment_section
    if not line.startswith(comment) and comment_section and fopen:
        comment_section = False
        writeline = True
        
    # If the current line starts with the column label indicator, then determine which columns are present
    # and compile the appropriate column labels line to write to output file
    if line.startswith(column_labels):
        # for debigging -- toggle the indicator for the first line of data
        #one_data_line = True
        
        # tab delimited data, so split along tabs
        labels = line.split('\t')
        
        # helper function to determine the index of a column in the list of column names
        def find_index(labels, category):
            index = -1
            i = 0
            for l in labels:
                if l.endswith(category):
                    index = i
                    break
                i += 1
            return index
        discharge_index = find_index(labels, '_00060')
        gage_height_index = find_index(labels, '_00065')
        
        #print(discharge_index,gage_height_index)
        # create the column labels line
        if (discharge_index>=0) and (gage_height_index>=0):
            line = ','.join(columns) + '\n'
        elif (discharge_index>=0):
            line = ','.join(columns[0:2]) + '\n'
        else:
            line = ','.join([columns[0],columns[1]]) + '\n'
            
        writeline = True
        # the line after the column labels line contains extra information that will not
        # be written to output
        skip = True
        
    # if the current line is a dataline, write it to file
    if line.startswith(dataline):
        writeline = True
        #if one_data_line:
        #    print(line)
        #    one_data_line = False
        
    # if the current line is to be written to file, then write it
    if writeline:
        # if the line is a dataline, extract the date, streamflow and gageheight (as applicable); 
        # reformat date as ISO8601 YYYY-MM-DDThh:mm-MDT/MST offset (+)
        if line.startswith(dataline):
            components = line.split('\t')
            line = components[2].replace(' ','T')+'-'+components[3].replace('MDT','06').replace('MST','07')
            if (discharge_index>=0):
                line += ',' + components[discharge_index] 
            if (gage_height_index>=0):
                line += ',' + components[gage_height_index] 
            line += '\n'
        f.write(line)

Site found:  USGS06721500
Site found:  USGS06724970
Site found:  USGS06725450
Site found:  USGS06726900
Site found:  USGS06727000
Site found:  USGS06727410
Site found:  USGS06727500
Site found:  USGS06730160
Site found:  USGS06730200
Site found:  USGS06730400
Site found:  USGS06730500
Site found:  USGS06730525
Site found:  USGS395331105134400
Site found:  USGS395452105113800


In [13]:
#declare data storage url and file names -- !! CURRENTLY NOT AVAILABLE SINCE MARIA DOES NOT HAVE ACCESS TO DATAX
data_storage_url = "www.my_domain.com/storage"
file_1_name = "USGS06721500.csv"
file_2_name = "USGS06724970.csv"
file_3_name = "USGS06725450.csv"
file_4_name = "USGS06726900.csv"
file_5_name = "USGS06727000.csv"
file_6_name = "USGS06727410.csv"
file_7_name = "USGS06727500.csv"
file_8_name = "USGS06730160.csv"
file_9_name = "USGS06730200.csv"
file_10_name = "USGS06730400.csv"
file_11_name = "USGS06730500.csv"
file_12_name = "USGS06730525.csv"
file_13_name = "USGS395331105134400.csv"
file_14_name = "USGS395452105113800.csv"

In [14]:
#declare file urls
file_1_data_url = f"{data_storage_url}/{file_1_name}"
file_2_data_url = f"{data_storage_url}/{file_2_name}"
file_3_data_url = f"{data_storage_url}/{file_3_name}"
file_4_data_url = f"{data_storage_url}/{file_4_name}"
file_5_data_url = f"{data_storage_url}/{file_5_name}"
file_6_data_url = f"{data_storage_url}/{file_6_name}"
file_7_data_url = f"{data_storage_url}/{file_7_name}"
file_8_data_url = f"{data_storage_url}/{file_8_name}"
file_9_data_url = f"{data_storage_url}/{file_9_name}"
file_10_data_url = f"{data_storage_url}/{file_10_name}"
file_11_data_url = f"{data_storage_url}/{file_11_name}"
file_12_data_url = f"{data_storage_url}/{file_12_name}"
file_13_data_url = f"{data_storage_url}/{file_13_name}"
file_14_data_url = f"{data_storage_url}/{file_14_name}"

In [15]:
# Similar to dataset and variable registrations, we are going to generate unique resource record_ids to 
# make these requests repeatable without creating new records. But remember, these will be auto-generated
# if not given

#one time uuid generator
#for i in range(2):
#    print(str(uuid.uuid4()))
    
file_1_record_id = "dd52e66b-3149-4d46-8f8e-a18e46136e55"
file_2_record_id = "25916ccf-d108-4187-b243-2b257ce67fa5"
file_3_record_id = "f72e52a0-5d80-474a-a35a-93a9836e72fa"
file_4_record_id = "3bec0225-c63c-435b-b117-3a19dc86578e"
file_5_record_id = "14c4d46c-c2c4-4aac-8683-343205eb8e7"
file_6_record_id = "eabfa797-6eaf-4f8c-b11b-975a39fca51c"
file_7_record_id = "479803fc-ffcb-4deb-9848-247a4bbf8d73"
file_8_record_id = "97d97437-8b9c-462b-b0d6-0648228df842"
file_9_record_id = "3471873a-74c3-45f7-9d91-c8038ee94ebe"
file_10_record_id = "4795320b-37a5-4eb1-8c42-b83808862b04"
file_11_record_id = "1be6de1a-653a-4b52-b02c-6f0ea1d3f487"
file_12_record_id = "9ca76777-8839-459f-97bc-5eba9519522a"
file_13_record_id = "7cfd0978-11f5-4da3-aad9-df8d98f677c6"
file_14_record_id = "b60c897a-9b28-45d2-aa5c-467def5a318a"

In [16]:
# define the temporal coverage in ISO format, UTC value conversion
file_1_temporal_coverage = {
    "start_time": "1986-10-01T07:00:00",
    "end_time": "2007-09-30T06:00:00"
}
file_2_temporal_coverage = {
    "start_time": "2014-03-06T07:00:00",
    "end_time": "2019-06-18T20:10:00"
}
file_3_temporal_coverage = {
    "start_time": "1988-10-01T07:29:00",
    "end_time": "2013-09-12T05:45:00"
}
file_4_temporal_coverage = {
    "start_time": "1986-10-01T06:15:00",
    "end_time": "1995-04-01T06:45:00"
}
file_5_temporal_coverage = {
    "start_time": "1988-12-21T07:00:00",
    "end_time": "1993-08-18T05:45:00"
}
file_6_temporal_coverage = {
    "start_time": "2011-04-01T07:00:00",
    "end_time": "2013-09-12T05:55:00"
}
file_7_temporal_coverage = {
    "start_time": "1986-10-01T06:15:00",
    "end_time": "2019-06-18T20:50:00"
}
file_8_temporal_coverage = {
    "start_time": "2013-04-01T06:55:00",
    "end_time": "2019-06-18T20:50:00"
}
file_9_temporal_coverage = {
    "start_time": "1987-09-17T06:00:00",
    "end_time": "2019-06-18T20:15:00"
}
file_10_temporal_coverage = {
    "start_time": "1997-07-02T06:00:00",
    "end_time": "2005-10-01T05:45:00"
}
file_11_temporal_coverage = {
    "start_time": "1986-10-01T06:30:00",
    "end_time": "2019-06-18T21:00:00"
}
file_12_temporal_coverage = {
    "start_time": "2013-09-19T06:00:00",
    "end_time": "2019-06-18T20:00:00"
}
file_13_temporal_coverage = {
    "start_time": "1994-05-05T06:00:00",
    "end_time": "1996-10-01T05:45:00"
}
file_14_temporal_coverage = {
    "start_time": "1995-10-17T06:00:00",
    "end_time": "1996-09-29T23:00:00"
}

Add location coordinates pertaining to each file. The data catalog only accepts BoundingBox format at the moment, so setting a very small area with xmin, ymin = the coordinates provided by USGS. Coordinates converted to decimal and presented in WGS 84 (standard used in data catalog).

In [17]:
# helper function to convert deg-min-sec to decimal
#def(degminsec):
    

file_1_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.218889, 
        "ymin": -105.527778,
        "xmax": 40.218890,
        "ymax": -105.527779,
    }
}
file_2_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.134278, 
        "ymin": -105.130819,
        "xmax": 40.134279, 
        "ymax": -105.130820,
    }
}
file_3_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.157422, 
        "ymin": -105.015394,
        "xmax": 40.157423,
        "ymax": -105.015395
    }
}
file_4_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.011667, 
        "ymin": -105.348456,
        "xmax": 40.011668,
        "ymax": -105.348457
    }
}
file_5_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.006389, 
        "ymin": -105.330278,
        "xmax": 40.006390,
        "ymax": -105.330279
    }
}
file_6_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.042028, 
        "ymin": -105.364917,
        "xmax": 40.042029,
        "ymax": -105.364918
    }
}
file_7_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.018667, 
        "ymin": -105.32625,
        "xmax": 40.018668,
        "ymax": -105.32626
    }
}
file_8_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.057611, 
        "ymin": -105.348778,
        "xmax": 40.057612,
        "ymax": -105.348779
    }
}
file_9_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.051667, 
        "ymin": -105.178333,
        "xmax": 40.051668,
        "ymax": -105.178334
    }
}
file_10_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 39.976111, 
        "ymin": -105.116667,
        "xmax": 39.976112,
        "ymax": -105.116668
    }
}
file_11_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.138778, 
        "ymin": -105.020222,
        "xmax": 40.138779,
        "ymax": -105.020223
    }
}
file_12_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 40.160467, 
        "ymin": -105.007936,
        "xmax": 40.160468,
        "ymax": -105.007937
    }
}
file_13_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 39.891944, 
        "ymin": -105.228889,
        "xmax": 39.891945,
        "ymax": -105.228890
    }
}
file_14_spatial_coverage = {
    "type": "BoundingBox",
    "value": {
        "xmin": 39.915833, 
        "ymin": -105.193611,
        "xmax": 39.915834,
        "ymax": -105.193612
    }
}

### This is the actual registration request. Inspect all information before official submission!! Need a way to set header char to '#'
Data files need to have a URL, so below yields an error.

In [19]:
# submit register request
resource_defs = {
    "resources": [
        {
            "record_id": file_1_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_1_name,
            "resource_type": "csv",
            "data_url": file_1_data_url,
            "metadata": {
                "spatial_coverage": file_1_spatial_coverage,
                "temporal_coverage": file_1_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_2_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_2_name,
            "resource_type": "csv",
            "data_url": file_2_data_url,
            "metadata": {
                "spatial_coverage": file_2_spatial_coverage,
                "temporal_coverage": file_2_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_3_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_3_name,
            "resource_type": "csv",
            "data_url": file_3_data_url,
            "metadata": {
                "spatial_coverage": file_3_spatial_coverage,
                "temporal_coverage": file_3_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_4_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_4_name,
            "resource_type": "csv",
            "data_url": file_4_data_url,
            "metadata": {
                "spatial_coverage": file_4_spatial_coverage,
                "temporal_coverage": file_4_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_5_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_5_name,
            "resource_type": "csv",
            "data_url": file_5_data_url,
            "metadata": {
                "spatial_coverage": file_5_spatial_coverage,
                "temporal_coverage": file_5_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_6_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_6_name,
            "resource_type": "csv",
            "data_url": file_6_data_url,
            "metadata": {
                "spatial_coverage": file_6_spatial_coverage,
                "temporal_coverage": file_6_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_7_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_7_name,
            "resource_type": "csv",
            "data_url": file_7_data_url,
            "metadata": {
                "spatial_coverage": file_7_spatial_coverage,
                "temporal_coverage": file_7_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_8_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_8_name,
            "resource_type": "csv",
            "data_url": file_8_data_url,
            "metadata": {
                "spatial_coverage": file_8_spatial_coverage,
                "temporal_coverage": file_8_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_9_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_9_name,
            "resource_type": "csv",
            "data_url": file_9_data_url,
            "metadata": {
                "spatial_coverage": file_9_spatial_coverage,
                "temporal_coverage": file_9_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_10_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_10_name,
            "resource_type": "csv",
            "data_url": file_10_data_url,
            "metadata": {
                "spatial_coverage": file_10_spatial_coverage,
                "temporal_coverage": file_10_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_11_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_11_name,
            "resource_type": "csv",
            "data_url": file_11_data_url,
            "metadata": {
                "spatial_coverage": file_11_spatial_coverage,
                "temporal_coverage": file_11_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_12_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"],
                gageheight_variable["record_id"]
            ],
            "name": file_12_name,
            "resource_type": "csv",
            "data_url": file_12_data_url,
            "metadata": {
                "spatial_coverage": file_12_spatial_coverage,
                "temporal_coverage": file_12_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4269" #NAD83 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_13_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_13_name,
            "resource_type": "csv",
            "data_url": file_13_data_url,
            "metadata": {
                "spatial_coverage": file_13_spatial_coverage,
                "temporal_coverage": file_13_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        },
        {
            "record_id": file_14_record_id,
            "dataset_id": dataset_record_id,
            "provenance_id": provenance_id,
            "variable_ids": [
                time_variable["record_id"],
                streamflow_variable["record_id"]
            ],
            "name": file_14_name,
            "resource_type": "csv",
            "data_url": file_14_data_url,
            "metadata": {
                "spatial_coverage": file_14_spatial_coverage,
                "temporal_coverage": file_14_temporal_coverage,
                "geospatial_metadata": {
                    "srs": {
                        "srid": "EPSG:4267" #NAD27 projection
                    },
                },
                "delimiter": ",",
                "has_header": True,
                #header_char? ... '#'
            },
            "layout": {}
        }
    ]
}

# ... and register them in bulk
resp = requests.post(f"{url}/datasets/register_resources", 
                                        headers=request_headers,
                                        json=resource_defs)


parsed_response = handle_api_response(resp, print_response=True)


resources = parsed_response["resources"]
    
resource_1 = next(record for record in resources if record["name"] == file_1_name)
resource_2 = next(record for record in resources if record["name"] == file_2_name)
resource_3 = next(record for record in resources if record["name"] == file_3_name)
resource_4 = next(record for record in resources if record["name"] == file_4_name)
resource_5 = next(record for record in resources if record["name"] == file_5_name)
resource_6 = next(record for record in resources if record["name"] == file_6_name)
resource_7 = next(record for record in resources if record["name"] == file_7_name)
resource_8 = next(record for record in resources if record["name"] == file_8_name)
resource_9 = next(record for record in resources if record["name"] == file_9_name)
resource_10 = next(record for record in resources if record["name"] == file_10_name)
resource_11 = next(record for record in resources if record["name"] == file_11_name)
resource_12 = next(record for record in resources if record["name"] == file_12_name)
resource_13 = next(record for record in resources if record["name"] == file_13_name)
resource_14 = next(record for record in resources if record["name"] == file_14_name)

## Uncomment below to print individual records    
# print(f"{file_1_name}: {resource_1}")
# print(f"{file_2_name}: {resource_2}")

{ 'API Response': { 'error': "{'VariableSchemaValidationError': [{'record': "
                             "{'dataset_id': "
                             "'4e8ade31-7729-4891-a462-2dac66158512', "
                             "'provenance_id': "
                             "'e8287ea4-e6f2-47aa-8bfc-0c22852735c8', 'name': "
                             "'USGS06727000.csv', 'record_id': "
                             "'14c4d46c-c2c4-4aac-8683-343205eb8e7', "
                             "'variable_ids': "
                             "['9358af57-192f-4cc3-9bee-837e76819674', "
                             "'c22deb3b-ebda-48cb-950a-2f4f00498197'], "
                             "'resource_type': 'csv', 'data_url': "
                             "'www.my_domain.com/storage/USGS06727000.csv', "
                             "'json_metadata': {'spatial_coverage': {'type': "
                             "'BoundingBox', 'value': {'xmin': 40.006389, "
                             "'ymin': -105.

Exception: Bad request ^