# Hydrocron API to pull SWOT RiverSP data


In [1]:
import folium
import requests
from io import StringIO
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import os
print(os.getcwd())

/Users/camryn/Documents/Duke/courses/geospatial_data_analysis


# Time series pulls for multiple nodes


### Pull many nodes by loading a csv with nodes of interest:

In [16]:
# Load the CSV file with column 'Node_ID' with nodes of interest
df = pd.read_csv('/Users/camryn/Documents/Duke/courses/geospatial_data_analysis/SWOTson_River_Camryn/Watson_domain_v16.csv')

# Remove duplicate Node_ID values
df_unique = df.drop_duplicates(subset=['Node_ID'])

# Extract the unique Node_IDs as a list
node_ids = df_unique['Node_ID'].astype(str).tolist()

print(node_ids)

['91270800030011', '91270800030021', '91270800030031', '91270800030041', '91270800030051', '91270800030061', '91270800030071', '91270800030081', '91270800030091', '91270800030101', '91270800030111', '91270800030121', '91270800030131', '91270800030141', '91270800030151', '91270800030161', '91270800030171', '91270800030181', '91270800030191', '91270800030201', '91270800030211', '91270800030221', '91270800030231', '91270800030241', '91270800030251', '91270800030261', '91270800030271', '91270800030281', '91270800030291', '91270800030301', '91270800030311', '91270800030321', '91270800030331', '91270800030341', '91270800030351', '91270800030361', '91270800030371', '91270800030381', '91270800030391', '91270800030401', '91270800030411', '91270800030421', '91270800040011', '91270800040021', '91270800040031', '91270800040041', '91270800040051', '91270800040061', '91270800040071', '91270800040081', '91270800040091', '91270800040101', '91270800040111', '91270800040121', '91270800040131', '91270800

All possible fields to be pulled in node riverSP:

'reach_id', 'node_id', 'time', 'time_tai', 'time_str',
'lat', 'lon', 'lat_u', 'lon_u', 'river_name',
'wse', 'wse_u', 'wse_r_u',
'width', 'width_u',
'area_total', 'area_tot_u', 'area_detct', 'area_det_u', 'area_wse',
'layovr_val', 'node_dist', 'xtrk_dist',
'flow_angle', 'node_q', 'node_q_b',
'dark_frac', 'ice_clim_f', 'ice_dyn_f', 'partial_f', 'n_good_pix',
'xovr_cal_q', 'rdr_sig0', 'rdr_sig0_u', 'rdr_pol',
'geoid_hght', 'solid_tide', 'load_tidef', 'load_tideg', 'pole_tide',
'dry_trop_c', 'wet_trop_c', 'iono_c', 'xovr_cal_c',
'p_wse', 'p_wse_var', 'p_width', 'p_wid_var', 'p_dist_out', 
'p_dam_id', 'p_n_ch_max', 'p_n_ch_mod',
'cycle_id', 'pass_id', 'continent_id', 'range_start_time', 'range_end_time',
'crid', 'geometry', 'sword_version', 'collection_shortname', 'collection_version',
'granuleUR', 'ingest_time'

Field versions currently selected to pull:
reach_id,node_id,time,time_tai,time_str,lat,lon,wse,wse_u,wse_r_u,width,width_u,area_total,area_tot_u,area_detct,
area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,node_q,node_q_b,dark_frac,n_good_pix,rdr_sig0,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out, p_length

In [17]:
import time
chunk_size = 100  # Start with 50; adjust based on data size per request
results = []

def fetch_data_for_chunk(chunk):
    # get data for each node_id in the chunk
    for fid in chunk:
        try:
            response = requests.get(
                # specifiy datetime range below & add/remove fields of interest
                f"https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=Node&feature_id={fid}&start_time=2024-05-01T00:00:00Z&end_time=2024-10-01T00:00:00Z&output=csv&fields=reach_id,node_id,time,time_str,time_tai,lat,lon,wse,wse_u,wse_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,node_q,node_q_b,dark_frac,n_good_pix,rdr_sig0,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,p_length"
            ).json()
            results.append(response)
            
            # Print the response for the first 3 node_ids for debugging
            
            print(f"Response for node_id {fid}: {response}")
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for feature_id {fid}: {e}")

# Process in chunks
for i in range(0, len(node_ids), chunk_size):
    chunk = node_ids[i:i + chunk_size]
    fetch_data_for_chunk(chunk)
    time.sleep(1)  # Pause between chunks to reduce server load

# Further processing on the collected `results` can be done here
print(f"Total responses collected: {len(results)}")

Response for node_id 91270800030011: {'status': '200 OK', 'time': 930.444, 'hits': 49, 'results': {'csv': 'reach_id,node_id,time,time_str,time_tai,lat,lon,wse,wse_u,wse_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,node_q,node_q_b,dark_frac,n_good_pix,rdr_sig0,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,p_length,time_units,time_tai_units,lat_units,lon_units,wse_units,wse_u_units,wse_r_u_units,width_units,width_u_units,area_total_units,area_tot_u_units,area_detct_units,area_det_u_units,area_wse_units,layovr_val_units,node_dist_units,xtrk_dist_units,dark_frac_units,n_good_pix_units,rdr_sig0_units,p_dist_out_units,p_length_units\n91270800031,91270800030011,-999999999999.0,no_data,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-99999999

In [18]:
results

[{'status': '200 OK',
  'time': 930.444,
  'hits': 49,
  'results': {'csv': 'reach_id,node_id,time,time_str,time_tai,lat,lon,wse,wse_u,wse_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,node_q,node_q_b,dark_frac,n_good_pix,rdr_sig0,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,p_length,time_units,time_tai_units,lat_units,lon_units,wse_units,wse_u_units,wse_r_u_units,width_units,width_u_units,area_total_units,area_tot_u_units,area_detct_units,area_det_u_units,area_wse_units,layovr_val_units,node_dist_units,xtrk_dist_units,dark_frac_units,n_good_pix_units,rdr_sig0_units,p_dist_out_units,p_length_units\n91270800031,91270800030011,-999999999999.0,no_data,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,3,520093696,-9999999999

In [19]:
all_data = []

for response in results:
    # Proceed only if 'results' and 'csv' keys exist
    if response.get('results') and 'csv' in response['results']:
        csv_data = response['results']['csv']
        df = pd.read_csv(StringIO(csv_data))
        all_data.append(df)

# Concatenate all dataframes and export to CSV for all data collected
if all_data:
    merged_df = pd.concat(all_data, ignore_index=True)
    merged_df.to_csv("merged_results.csv", index=False)
    print("Data has been merged and saved to 'merged_results.csv'.")
else:
    print("No data to merge; 'results' may be empty.")

Data has been merged and saved to 'merged_results.csv'.


# Time series pulls for multiple reaches

## Pull multiple reaches with a csv

In [11]:
# Load the CSV file with column 'Reach_ID' with reaches of interest
df = pd.read_csv('/Users/camryn/Documents/Duke/courses/geospatial_data_analysis/SWOTson_River_Camryn/Watson_domain_v16.csv')

# Remove duplicate Reach_ID values
df_unique = df.drop_duplicates(subset=['Reach_ID'])

# Extract the unique Reach_IDs as a list
reach_ids = df_unique['Reach_ID'].astype(str).tolist()

print(reach_ids)

['91270800031', '91270800041', '91270800051', '91270800136']


All possible fields to be pulled in reach riverSP:

'reach_id', 'time', 'time_tai', 'time_str', 'p_lat', 'p_lon', 'river_name',
'wse', 'wse_u', 'wse_r_u', 'wse_c', 'wse_c_u',
'slope', 'slope_u', 'slope_r_u', 'slope2', 'slope2_u', 'slope2_r_u',
'width', 'width_u', 'width_c', 'width_c_u',
'area_total', 'area_tot_u', 'area_detct', 'area_det_u', 'area_wse',
'd_x_area', 'd_x_area_u',
'layovr_val', 'node_dist', 'loc_offset', 'xtrk_dist',
'dschg_c', 'dschg_c_u', 'dschg_csf', 'dschg_c_q',
'dschg_gc', 'dschg_gc_u', 'dschg_gcsf', 'dschg_gc_q',
'dschg_m', 'dschg_m_u', 'dschg_msf', 'dschg_m_q',
'dschg_gm', 'dschg_gm_u', 'dschg_gmsf', 'dschg_gm_q',
'dschg_b', 'dschg_b_u', 'dschg_bsf', 'dschg_b_q',
'dschg_gb', 'dschg_gb_u', 'dschg_gbsf', 'dschg_gb_q',
'dschg_h', 'dschg_h_u', 'dschg_hsf', 'dschg_h_q',
'dschg_gh', 'dschg_gh_u', 'dschg_ghsf', 'dschg_gh_q',
'dschg_o', 'dschg_o_u', 'dschg_osf', 'dschg_o_q',
'dschg_go', 'dschg_go_u', 'dschg_gosf', 'dschg_go_q',
'dschg_s', 'dschg_s_u', 'dschg_ssf', 'dschg_s_q',
'dschg_gs', 'dschg_gs_u', 'dschg_gssf', 'dschg_gs_q',
'dschg_i', 'dschg_i_u', 'dschg_isf', 'dschg_i_q',
'dschg_gi', 'dschg_gi_u', 'dschg_gisf', 'dschg_gi_q',
'dschg_q_b', 'dschg_gq_b',
'reach_q', 'reach_q_b',
'dark_frac', 'ice_clim_f', 'ice_dyn_f', 'partial_f', 'n_good_nod',
'obs_frac_n', 'xovr_cal_q', 'geoid_hght', 'geoid_slop',
'solid_tide', 'load_tidef', 'load_tideg', 'pole_tide',
'dry_trop_c', 'wet_trop_c', 'iono_c', 'xovr_cal_c',
'n_reach_up', 'n_reach_dn', 'rch_id_up', 'rch_id_dn',
'p_wse', 'p_wse_var', 'p_width', 'p_wid_var', 'p_n_nodes', 'p_dist_out',
'p_length', 'p_maf', 'p_dam_id', 'p_n_ch_max', 'p_n_ch_mod', 'p_low_slp',
'cycle_id', 'pass_id', 'continent_id', 'range_start_time', 'range_end_time',
'crid', 'geometry', 'sword_version', 'collection_shortname', 'collection_version',
'granuleUR', 'ingest_time'

Field versions currently selected to pull:
reach_id,time,time_tai,time_str,p_lat,p_lon,wse,wse_u,wse_r_u,slope,slope_u,slope_r_u,slope2,slope2_u,slope2_r_u,width,width_u,area_total,area_tot_u,area_detct,
area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,reach_q,reach_q_b,dark_frac,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,partial_f,n_good_nod

In [12]:
import time
chunk_size = 100  # Start with 50; adjust based on data size per request
results = []

def fetch_data_for_chunk(chunk):
    # get data for each reach_id in the chunk
    for fid in chunk:
        try:
            response = requests.get(
                # specifiy datetime range below & add/remove fields of interest
                f"https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=Reach&feature_id={fid}&start_time=2024-05-01T00:00:00Z&end_time=2024-10-01T00:00:00Z&output=csv&fields=reach_id,time,time_tai,time_str,p_lat,p_lon,wse,wse_u,wse_r_u,slope,slope_u,slope_r_u,slope2,slope2_u,slope2_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,reach_q,reach_q_b,dark_frac,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,partial_f,n_good_nod"
            ).json()
            results.append(response)
            
            # Print the response for the first 3 node_ids for debugging
            
            print(f"Response for reach_id {fid}: {response}")
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for feature_id {fid}: {e}")

# Process in chunks
for i in range(0, len(reach_ids), chunk_size):
    chunk = reach_ids[i:i + chunk_size]
    fetch_data_for_chunk(chunk)
    time.sleep(1)  # Pause between chunks to reduce server load

# Further processing on the collected `results` can be done here
print(f"Total responses collected: {len(results)}")

Response for reach_id 91270800031: {'status': '200 OK', 'time': 1914.385, 'hits': 49, 'results': {'csv': 'reach_id,time,time_tai,time_str,p_lat,p_lon,wse,wse_u,wse_r_u,slope,slope_u,slope_r_u,slope2,slope2_u,slope2_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,reach_q,reach_q_b,dark_frac,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,partial_f,n_good_nod,time_units,time_tai_units,p_lat_units,p_lon_units,wse_units,wse_u_units,wse_r_u_units,slope_units,slope_u_units,slope_r_u_units,slope2_units,slope2_u_units,slope2_r_u_units,width_units,width_u_units,area_total_units,area_tot_u_units,area_detct_units,area_det_u_units,area_wse_units,layovr_val_units,node_dist_units,xtrk_dist_units,dark_frac_units,p_dist_out_units,n_good_nod_units\n91270800031,-999999999999.0,-999999999999.0,no_data,67.02622929,-50.5854346,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-9999999999

In [13]:
results

[{'status': '200 OK',
  'time': 1914.385,
  'hits': 49,
  'results': {'csv': 'reach_id,time,time_tai,time_str,p_lat,p_lon,wse,wse_u,wse_r_u,slope,slope_u,slope_r_u,slope2,slope2_u,slope2_r_u,width,width_u,area_total,area_tot_u,area_detct,area_det_u,area_wse,layovr_val,node_dist,xtrk_dist,reach_q,reach_q_b,dark_frac,xovr_cal_q,cycle_id,pass_id,sword_version,p_dist_out,partial_f,n_good_nod,time_units,time_tai_units,p_lat_units,p_lon_units,wse_units,wse_u_units,wse_r_u_units,slope_units,slope_u_units,slope_r_u_units,slope2_units,slope2_u_units,slope2_r_u_units,width_units,width_u_units,area_total_units,area_tot_u_units,area_detct_units,area_det_u_units,area_wse_units,layovr_val_units,node_dist_units,xtrk_dist_units,dark_frac_units,p_dist_out_units,n_good_nod_units\n91270800031,-999999999999.0,-999999999999.0,no_data,67.02622929,-50.5854346,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999999999.0,-999999

### save output as a csv

In [14]:
all_data = []

for response in results:
    # Proceed only if 'results' and 'csv' keys exist
    if response.get('results') and 'csv' in response['results']:
        csv_data = response['results']['csv']
        df = pd.read_csv(StringIO(csv_data))
        all_data.append(df)

# Concatenate all dataframes and export to CSV for all data collected
if all_data:
    merged_df = pd.concat(all_data, ignore_index=True)
    merged_df.to_csv("merged_results.csv", index=False)
    print("Data has been merged and saved to 'merged_results.csv'.")
else:
    print("No data to merge; 'results' may be empty.")

Data has been merged and saved to 'merged_results.csv'.
