In [1]:
import pandas as pd
import numpy as np
import requests
import os
import json
import time

In [2]:
coastal_api = os.getenv('coastal_api')
r = requests.get('http://httpbin.org/user-agent')
useragent = json.loads(r.text)['user-agent']

In [10]:
# https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/381624075071901/forecasts

In [3]:
headers = {'User-Agent': useragent,
           'From': 'zrc3hc@virginia.edu',
          'token': coastal_api}

In [5]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api'
endpoint = '/regions'
r = requests.get(root + endpoint,
                headers = headers)
r

<Response [200]>

In [6]:
regions = json.loads(r.text)

In [10]:
all_forecasts = []
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/{id}/forecasts'
pause_duration = 1 
request_counter = 0
requests_per_batch = 5 

for region in regions:
    api_url = root.format(id=region['id'])
    response = requests.get(api_url, 
                           headers = headers)

    if response.status_code == 200:
        forecast_data = response.json()
        # Assuming each response returns a list of forecasts
        for forecast in forecast_data:
            forecast['region_id'] = region['id']  # Add region ID to each forecast
            forecast['region_abbreviation'] = region['abbreviation']
            forecast['region_fullName'] = region['fullName']
            all_forecasts.append(forecast)
    else:
        print(f"Failed to retrieve data for region ID: {region['id']}")
    request_counter += 1
    if request_counter >= requests_per_batch:
        print(f"Pausing for {pause_duration} seconds after {requests_per_batch} requests.")
        time.sleep(pause_duration)
        request_counter = 0


Pausing for 1 seconds after 5 requests.
Failed to retrieve data for region ID: 8
Failed to retrieve data for region ID: 9
Failed to retrieve data for region ID: 10
Pausing for 1 seconds after 5 requests.
Failed to retrieve data for region ID: 20
Failed to retrieve data for region ID: 21
Pausing for 1 seconds after 5 requests.


In [11]:
forecasts_df = pd.DataFrame(all_forecasts)
forecasts_df

Unnamed: 0,id,date,time,duration,maxPredictedImpact,maxPredictedImpactCode,region_id,region_abbreviation,region_fullName
0,72261,2023-12-02,12:00:00,144,Collision,1,1,BOX,"Boston, MA"
1,72249,2023-12-02,06:00:00,144,Collision,1,1,BOX,"Boston, MA"
2,72234,2023-12-02,00:00:00,144,Collision,1,1,BOX,"Boston, MA"
3,72212,2023-12-01,12:00:00,144,Collision,1,1,BOX,"Boston, MA"
4,72206,2023-12-01,06:00:00,144,Collision,1,1,BOX,"Boston, MA"
...,...,...,...,...,...,...,...,...,...
270,71983,2023-11-26,06:00:00,144,Overwash,2,22,GYX,"Gray, ME"
271,71963,2023-11-25,18:00:00,144,Collision,1,22,GYX,"Gray, ME"
272,71955,2023-11-25,12:00:00,144,Collision,1,22,GYX,"Gray, ME"
273,71941,2023-11-25,06:00:00,144,Collision,1,22,GYX,"Gray, ME"


In [12]:
id_pairs = [(row['region_id'], row['id']) for index, row in forecasts_df.iterrows()]

In [13]:
headers = {'User-Agent': useragent,
           'From': 'zrc3hc@virginia.edu',
          'token': coastal_api}

pause_duration = 1  
request_counter = 0
requests_per_batch = 10

all_sites_data = []
 
for region_id, forecast_id in id_pairs:
    # Format the API URL with the current region_id and forecast_id
    root = f'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/{region_id}/forecasts/{forecast_id}/sites'

    # Make the API request
    r = requests.get(root, headers=headers)  # Ensure headers is defined or remove it if not needed

    # Check if the request was successful
    if r.status_code == 200:
        site_data = r.json()
        for site in site_data:
            site['region_id'] = region_id
            site['forecast_id'] = forecast_id
        all_sites_data.extend(site_data)
    else: 
        print(f"Failed to retrieve data for region ID {region_id}, forecast ID {forecast_id}")

    # Increment the request counter
    request_counter += 1

    # Check if it's time to pause
    if request_counter >= requests_per_batch:
        print(f"Pausing for {pause_duration} seconds after {requests_per_batch} requests.")
        time.sleep(pause_duration)
         # Reset the counter after the pause

Failed to retrieve data for region ID 1, forecast ID 72249
Failed to retrieve data for region ID 1, forecast ID 72234
Failed to retrieve data for region ID 1, forecast ID 72212
Failed to retrieve data for region ID 1, forecast ID 72206
Failed to retrieve data for region ID 1, forecast ID 72191
Failed to retrieve data for region ID 1, forecast ID 72175
Failed to retrieve data for region ID 1, forecast ID 72166
Failed to retrieve data for region ID 1, forecast ID 72153
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 requests.
Pausing for 1 seconds after 10 reque

In [14]:
sites_df = pd.DataFrame(all_sites_data)
sites_df

Unnamed: 0,id,siteLatitude,siteLongitude,location,city,stateOrTerritory,siteFeatureType,toeHeight,crestHeight,dateElevationMeasured,...,forecastLongitude,distanceBetweenSiteAndForecast,beachSlope,maxPredictedImpact,maxPredictedImpactCode,predictedDurationOfCollision,predictedDurationOfOverwash,predictedDurationOfInundation,region_id,forecast_id
0,1450,41.3285,-70.8190,Gay Head Town Beach,Chilmark,MA,Dune,2.99,7.00,2013-11-01,...,-70.8120,896,0.1560,,0,0,0,0,1,72261
1,1451,41.3224,-70.8120,Long Beach,Chilmark,MA,Cliff,3.10,7.31,2013-11-01,...,-70.8120,0,0.1560,,0,0,0,0,1,72261
2,1452,41.3195,-70.8070,Long Beach,Chilmark,MA,Dune,3.49,7.14,2013-11-01,...,-70.8070,0,0.0950,,0,0,0,0,1,72261
3,1453,41.3114,-70.7940,Squibnocket Marshes,Chilmark,MA,Dune,3.34,7.42,2013-11-01,...,-70.7940,0,0.0874,,0,0,0,0,1,72261
4,1454,41.3021,-70.7780,Squibnocket Point,Chilmark,MA,Dune,2.70,7.29,2013-11-01,...,-70.7780,0,0.1102,,0,0,0,0,1,72261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6370,5381,43.4190,-70.3854,Horseshoe Cove,Kennebunkport,ME,Dune,2.20,3.07,2014-11-01,...,-70.3850,32,0.0689,Collision,1,6,0,0,22,71922
6371,5382,43.4362,-70.3684,Fortunes Rocks Beach,Old Orchard Beach,ME,Dune,3.10,5.03,2014-11-01,...,-70.3680,32,0.0773,,0,0,0,0,22,71922
6372,5383,43.4388,-70.3634,Fletcher Neck,Old Orchard Beach,ME,Dune,2.91,5.21,2014-11-01,...,-70.3630,32,0.1032,Collision,1,1,0,0,22,71922
6373,5384,43.4420,-70.3520,Fletcher Neck,Old Orchard Beach,ME,Dune,3.30,4.45,2014-11-01,...,-70.3520,0,0.0880,,0,0,0,0,22,71922


Failed to retrieve data for region ID 1, forecast ID 72249
Failed to retrieve data for region ID 1, forecast ID 72234
Failed to retrieve data for region ID 1, forecast ID 72212
Failed to retrieve data for region ID 1, forecast ID 72206
Failed to retrieve data for region ID 1, forecast ID 72191
Failed to retrieve data for region ID 1, forecast ID 72175
Failed to retrieve data for region ID 1, forecast ID 72166
Failed to retrieve data for region ID 1, forecast ID 72153

In [31]:
failed_id_pairs = [
    (1, 72249),
    (1, 72234),
    (1, 72212),
    (1, 72206),
    (1, 72191),
    (1, 72175),
    (1, 72166),
    (1, 72153)]

In [43]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72249/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


Unnamed: 0,id,siteLatitude,siteLongitude,location,city,stateOrTerritory,siteFeatureType,toeHeight,crestHeight,dateElevationMeasured,...,forecastLongitude,distanceBetweenSiteAndForecast,beachSlope,maxPredictedImpact,maxPredictedImpactCode,predictedDurationOfCollision,predictedDurationOfOverwash,predictedDurationOfInundation,region_id,forecast_id
0,1450,41.3285,-70.8190,Gay Head Town Beach,Chilmark,MA,Dune,2.99,7.00,2013-11-01,...,-70.8120,896,0.1560,,0,0,0,0,1,72261
1,1451,41.3224,-70.8120,Long Beach,Chilmark,MA,Cliff,3.10,7.31,2013-11-01,...,-70.8120,0,0.1560,,0,0,0,0,1,72261
2,1452,41.3195,-70.8070,Long Beach,Chilmark,MA,Dune,3.49,7.14,2013-11-01,...,-70.8070,0,0.0950,,0,0,0,0,1,72261
3,1453,41.3114,-70.7940,Squibnocket Marshes,Chilmark,MA,Dune,3.34,7.42,2013-11-01,...,-70.7940,0,0.0874,,0,0,0,0,1,72261
4,1454,41.3021,-70.7780,Squibnocket Point,Chilmark,MA,Dune,2.70,7.29,2013-11-01,...,-70.7780,0,0.1102,,0,0,0,0,1,72261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6395,1470,41.3489,-70.6300,Long Cove,West Tisbury,MA,Dune,2.07,3.23,2013-11-01,...,-70.6300,0,0.1114,,0,0,0,0,1,72249
6396,1472,41.3499,-70.6060,Ripley Cove,West Tisbury,MA,Dune,2.47,4.26,2013-11-01,...,-70.6060,0,0.1211,,0,0,0,0,1,72249
6397,1473,41.3504,-70.5950,Long Point Wildlife Refuge,West Tisbury,MA,Dune,2.37,4.04,2013-11-01,...,-70.5950,0,0.1168,,0,0,0,0,1,72249
6398,1474,41.3504,-70.5890,Long Point Wildlife Refuge,West Tisbury,MA,Dune,2.30,3.98,2013-11-01,...,-70.5890,0,0.1134,,0,0,0,0,1,72249


In [44]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72234/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [45]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72212/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [46]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72206/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [48]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72191/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [49]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72175/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [50]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72166/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [51]:
root = 'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/1/forecasts/72153/sites'
response = requests.get(root, headers=headers)

# Convert the JSON response to a DataFrame
if response.status_code == 200:
    site_data = response.json()
    df = pd.DataFrame(site_data)

    # Add a new column 'regions' with all values set to '1'
    df = df.assign(region_id='1')
    df = df.assign(forecast_id='72249')

sites_df = pd.concat([sites_df, df], ignore_index=True)


In [53]:
sites_df.to_csv("sites_data.csv", index=True)

In [46]:
# Assuming df is your DataFrame
id_pairs2 = [(row['id'], row['region_id'], row['forecast_id']) for index, row in sites_df.iterrows()]

In [None]:
headers = {'User-Agent': useragent,
           'From': 'zrc3hc@virginia.edu',
          'token': coastal_api}

pause_duration = 1  
request_counter = 0
requests_per_batch = 10

water_levels_info= []
 
for site_id, region_id, forecast_id in id_pairs2:
    # Corrected API URL
    root = f'https://coastal.er.usgs.gov/hurricanes/research/twlviewer/api/regions/{region_id}/forecasts/{forecast_id}/sites/{site_id}/waterlevels'

    # Make the API request
    r = requests.get(root, headers=headers)

    # Check if the request was successful
    if r.status_code == 200:
        water_data = r.json()
        for site in water_data:
            site['region_id'] = region_id
            site['forecast_id'] = forecast_id
            site['site_id'] = site_id
        water_levels_info.extend(water_data)
    else: 
        print(f"Failed to retrieve data for region ID {region_id}, forecast ID {forecast_id}, site ID {site_id}")

    # Increment the request counter
    request_counter += 1

    # Check if it's time to pause and reset the counter
    if request_counter >= requests_per_batch:
        print(f"Pausing for {pause_duration} seconds after {requests_per_batch} requests.")
        time.sleep(pause_duration)
        request_counter = 0 