In [None]:
!pip install noaa_coops

The tidal data will be retrieved from the National Oceanic and Atmospheric Admistration Center for Operational Oceanographic Products and Services (NOAA CO-OPS) API. This API allows querying by tidal observation station ID. For the purpose of this project, the tidal observation stations in Atlantic City, Cumberland, Cape May, and Sandy Hook will be used. Data is retrieved using the 'get_data' function and is returned the pandas dataframe format. The pandas dataframes are converted into namedtuples below to allow flexible manipulation. Attributes of the dataset include high tide time, high tide water level, low tide time, low tide water level, and date. Fields releant to tide times and water levels will be joined with eBird data on the date.

NOTE ON LIMITATIONS OF APIS: The current eBird API is only capable of querying data from the past 30 days. Thus, todays date minus 30 days must be used for these queries. Additionally, the NOAA COOPS data is only updated weekly, thus today's date minus 7 days must be used. Therefore this program can only acquire data for 3 consecutive weeks, not counting the current week.

ADDITIONAL NOTE ON LIMITATIONS: Atlantic City tidal data was not updated in February. NOAA staff has been contacted regarding the issue. Instead, Ship John Shoals tidal station data will be used instead (Cumberland County). Long-term for this data-set, Atlantic city data should be used as well so the code has been copied out.

In [2]:
# get tidal data from noaa coops api
import noaa_coops as nc # noaa_coops api
from datetime import date, timedelta, datetime
atlantic_city = nc.Station(8534720)
cape_may = nc.Station(8536110)
sandy_hook = nc.Station(8531680)
ship_john_shoal = nc.Station(8537121)

today = date.today().strftime('%Y-%m-%d')
today = str(today.replace('-', ''))

end_date = datetime.today() - timedelta(days=0)
end_date = end_date.strftime('%Y-%m-%d')
end_date = str(end_date.replace('-', ''))

start_date = datetime.today() - timedelta(days=30)
start_date = start_date.strftime('%Y-%m-%d')
start_date = str(start_date.replace('-', ''))

'''atlantic_city_tides = atlantic_city.get_data(
     begin_date= "20210101",
     end_date= "20210301",
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")'''

ship_john_shoal_tides = ship_john_shoal.get_data(
     begin_date= start_date,
     end_date= end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

cape_may_tides = cape_may.get_data(
     begin_date=start_date,
     end_date=end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

sandy_hook_tides = sandy_hook.get_data(
     begin_date=start_date,
     end_date=end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")


In [3]:
#ac_tides = list(atlantic_city_tides.itertuples(index=False, name='Atlantic_City'))
sjs_tides = list(ship_john_shoal_tides.itertuples(index=False, name='Cumberland_County'))
cm_tides = list(cape_may_tides.itertuples(index=False, name='Cape_May'))
sh_tides = list(sandy_hook_tides.itertuples(index=False, name='Sandy_Hook'))
#all_tides = ac_tides + cm_tides + sh_tides

tides_all = {'Cumberland': sjs_tides, 'Cape May': cm_tides, 'Monmouth': sh_tides}
for key, value in tides_all.items():
  current_list = value
  for i in range(len(current_list)):
    print(current_list[i])

Cumberland_County(date_time_HH=Timestamp('2021-02-05 08:48:00'), HH_water_level=7.695, date_time_H=Timestamp('2021-02-05 20:54:00'), H_water_level=7.291, date_time_L=Timestamp('2021-02-05 15:48:00'), L_water_level=5.901, date_time_LL=NaT, LL_water_level=nan)
Cumberland_County(date_time_HH=Timestamp('2021-02-06 10:06:00'), HH_water_level=7.607, date_time_H=Timestamp('2021-02-06 22:18:00'), H_water_level=7.016, date_time_L=Timestamp('2021-02-06 17:12:00'), L_water_level=5.822, date_time_LL=Timestamp('2021-02-06 03:48:00'), LL_water_level=5.715)
Cumberland_County(date_time_HH=Timestamp('2021-02-07 11:06:00'), HH_water_level=7.53, date_time_H=NaT, H_water_level=nan, date_time_L=Timestamp('2021-02-07 18:06:00'), L_water_level=5.796, date_time_LL=Timestamp('2021-02-07 04:48:00'), LL_water_level=5.507000000000001)
Cumberland_County(date_time_HH=Timestamp('2021-02-08 12:12:00'), HH_water_level=7.428, date_time_H=Timestamp('2021-02-08 00:06:00'), H_water_level=7.252999999999999, date_time_L=Tim

FIPS codes will be needed to query the eBird database for sightings in a given county. FIPS are numbers used to uniquely identify geographic regions, such as counties. FIPS codes are five-digit integers, with the first two digits indicating the state and the last three digits being county identifers. To acquire a list of full five-digit FIPS codes, the fcc website will be scraped using beautifulsoup. This project scope is New Jersey only, while the website contains FIPS codes for all 50 states. The New Jersey state code is 34 - so a regex ('34\d{3}\s*.*') was used to parse out five-digit numbers followed by a string containign the county name. FIPS and county codes were then extracted individually. The eBird API accepts strings in the US-{two-digit state code}-{county FIPS ID} format. So for new Jersey, we would use something like US-NJ-001. The five-digit FIPS codes and county names are manipulated into the desired format below, then saved into a dictionary with county names as keys.  



In [4]:
# importing beautifulsoup to scrape fcc.gov for fips codes (county data)
from bs4 import BeautifulSoup
import urllib.request
import re

urlpage =  'https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt'
page = urllib.request.urlopen(urlpage)
soup = BeautifulSoup(page, 'html.parser')

# get all matches for FIPS -whitespace- county name (34 is NJ state code)
matches = re.findall( r'34\d{3}\s*.*', str(soup))
# the first match is the new jersey state fips code - we only need county fips codes. omitting first match
matches = matches[1:]

counties = {}
for i in range(len(matches)):
    fips_codes = re.findall(r'34\d{3}', matches[i])
    fips_code = fips_codes[0]
    county_names = re.findall(r'\s(.*County)', matches[i])
    county_name = county_names[0]
    county_name = county_name.lstrip()
    counties[county_name] = fips_code

# county fips codes are the last 3 digits of the whole code
county_fips = {}
for key, value in counties.items():
    corrected_value = value[2:5]
    corrected_value = 'US-NJ-' + corrected_value
    corrected_key = key[:-7]
    county_fips[corrected_key] = corrected_value
print(county_fips)

{'Atlantic': 'US-NJ-001', 'Bergen': 'US-NJ-003', 'Burlington': 'US-NJ-005', 'Camden': 'US-NJ-007', 'Cape May': 'US-NJ-009', 'Cumberland': 'US-NJ-011', 'Essex': 'US-NJ-013', 'Gloucester': 'US-NJ-015', 'Hudson': 'US-NJ-017', 'Hunterdon': 'US-NJ-019', 'Mercer': 'US-NJ-021', 'Middlesex': 'US-NJ-023', 'Monmouth': 'US-NJ-025', 'Morris': 'US-NJ-027', 'Ocean': 'US-NJ-029', 'Passaic': 'US-NJ-031', 'Salem': 'US-NJ-033', 'Somerset': 'US-NJ-035', 'Sussex': 'US-NJ-037', 'Union': 'US-NJ-039', 'Warren': 'US-NJ-041'}


eBird query:

In [5]:
!pip install ebird-api



In [6]:
# import ebird API package and assign API key
from ebird.api import get_observations
from ebird.api import get_species_observations
from ebird.api import get_nearby_observations
from ebird.api import get_visits
from ebird.api import get_checklist
from ebird.api import get_taxonomy, get_taxonomy_forms, get_taxonomy_versions
from ebird.api import get_notable_observations
api_key = 'aape5hn8f10a' # api key obtained by request from eBird (personal use only - PLEASE DO NOT USE OR SHARE!)

In [7]:
taxonomy = get_taxonomy(api_key) # get scientific name, common name, species Code, category, taxonomic order, etc.

# we are interested in Charadriiformes* (Shorebirds, Gulls, Terns, Jaegers and Alcids)
# extracting only the data we need for this project into a new dictionary
shorebirds = [] # list of dictionaries from taxonomy for shorebirds
for d in taxonomy:
    for key, value in d.items():
        if key == 'order':
            if d[key] == 'Charadriiformes':
                shorebirds.append(d)

common_names = []
species_codes = []
for i in range(len(shorebirds)):
    current_species = shorebirds[i]
    for key, value in current_species.items():
        if key == 'comName':
            common_names.append(current_species['comName'])
        if key == 'speciesCode':
            species_codes.append(current_species['speciesCode'])

shorebirds_dict_pre = {}
for i in range(len(common_names)):
    current_name = common_names[i]
    shorebirds_dict_pre[current_name] = species_codes[i]

# cutting down the scope here to semi-common shorebirds (not gulls, terns, jaegers)
# need to decrease API request volume and tighten the scope of the project
# 18 semi-common shorebird abbreviations x 3 counties = 36 individual requests
narrowed_scope_abbv = ['killde', 'sander', 'dunlin', 'pursan', 'ameoys',
                       'bkbplo', 'greyel', 'semplo', 'lobdow', 'sposan',
                       'lesyel', 'leasan', 'margod', 'willet1', 'shbdow',
                       'wessan', 'pecsan', 'amgplo', 'solsan', 'stisan',
                       'hudgod', 'pipplo', 'uplsan']
shorebirds_dict = {}
for key, value in shorebirds_dict_pre.items():
  if value in narrowed_scope_abbv:
    shorebirds_dict[key] = value

for key, value in shorebirds_dict.items():
  print(key, value)

American Oystercatcher ameoys
Black-bellied Plover bkbplo
American Golden-Plover amgplo
Semipalmated Plover semplo
Piping Plover pipplo
Killdeer killde
Upland Sandpiper uplsan
Hudsonian Godwit hudgod
Marbled Godwit margod
Stilt Sandpiper stisan
Sanderling sander
Dunlin dunlin
Purple Sandpiper pursan
Least Sandpiper leasan
Pectoral Sandpiper pecsan
Western Sandpiper wessan
Short-billed Dowitcher shbdow
Long-billed Dowitcher lobdow
Spotted Sandpiper sposan
Solitary Sandpiper solsan
Greater Yellowlegs greyel
Willet willet1
Lesser Yellowlegs lesyel


In [8]:
# relevant counties are Atlantic, Monmouth, and Cape May for query
# these queries pull a lot of data - so data will only be queried once from the API
# the data will be written into a csv file for later use, and the code in this cell will be commented out

relevant_county_codes = []
for key, value in county_fips.items():
    if key in 'Cumberland' or key in 'Monmouth' or key in 'Cape May': # Atlantic was removed due to NOAA limitations
        relevant_county_codes.append(value)

# list of shorebird species codes for query
relevant_species_codes = []
for key, value in shorebirds_dict.items():
    relevant_species_codes.append(value)

def append_sightings(start, stop, county, specie): # use this sparingly, data is expensive
  try: # ignore bad requests (404 errors)
      get_obs = get_species_observations(api_key, specie, county, back=30) 
      for i in range(len(get_obs)):
        current_dict = get_obs[i]
        if current_dict:
          current_dict['county'] = county
          records.append(current_dict)
  except:
    print('error')

records = []

i = 0 # the limit of records per query is 30, so we need to break it down into intervals of 30
j = 30

for c in range(len(relevant_county_codes)):
  current_county = relevant_county_codes[c]
  for n in range(len(relevant_species_codes)):
    current_specie = relevant_species_codes[n]
    append_sightings(i, j, current_county, current_specie)
    i += 30
    j += 30

# some checklists were empty (as they did not contain target species) - let's remove these
valid_records = []
for i in range(len(records)):
    if not records[i]:
        continue
    else:
        valid_records.append(records[i])

In [9]:
for i in range(len(valid_records)):
  current_record = valid_records[i]
  valid_records[i]['sightingId'] = current_record['locId'] + '-' + current_record['obsDt'] + current_record['subId']
  print(valid_records[i])

{'speciesCode': 'ameoys', 'comName': 'American Oystercatcher', 'sciName': 'Haematopus palliatus', 'locId': 'L592872', 'locName': 'Ocean Drive--Two Mile Landing', 'obsDt': '2021-03-06 13:34', 'lat': 38.9595423, 'lng': -74.866333, 'obsValid': True, 'obsReviewed': False, 'locationPrivate': False, 'subId': 'S82880135', 'county': 'US-NJ-009', 'sightingId': 'L592872-2021-03-06 13:34S82880135'}
{'speciesCode': 'ameoys', 'comName': 'American Oystercatcher', 'sciName': 'Haematopus palliatus', 'locId': 'L13935009', 'locName': 'Stone Harbor Bridge, Cape May US-NJ 39.03941, -74.78226', 'obsDt': '2021-03-06 11:15', 'howMany': 5, 'lat': 39.039408, 'lng': -74.782258, 'obsValid': True, 'obsReviewed': False, 'locationPrivate': True, 'subId': 'S82866329', 'county': 'US-NJ-009', 'sightingId': 'L13935009-2021-03-06 11:15S82866329'}
{'speciesCode': 'ameoys', 'comName': 'American Oystercatcher', 'sciName': 'Haematopus palliatus', 'locId': 'L491378', 'locName': 'Champagne Island', 'obsDt': '2021-03-06 10:39'

In [10]:
from typing import NamedTuple
test_list = []

class eBird_Tidal_Join(NamedTuple):
    sightingId: str
    date: datetime.date
    obsTime: datetime.time
    speciesName: str
    speciesCode: str
    locName: str
    locId: str
    lat: str
    lon: str
    county: str
    howMany: int
    tidal_station: str
    hh_time: datetime
    hh_water_level: float
    h_time: datetime
    h_water_level: float
    l_time: datetime
    l_water_level: float
    ll_time: datetime
    ll_water_level: float

list_of_sightings = []
test_list = []

# for all valid eBird records
for i in range(len(valid_records)):
  # get current record
  current_record = valid_records[i]
  # get the eBird observation date
  obs_date_str = str(current_record['obsDt'])
  # then convert to a datetime object
  obs_date_obj = datetime.strptime(obs_date_str, '%Y-%m-%d %H:%M')


  test_dict = {}

  # for all items in the county tide dictionary
  for county, tides in tides_all.items():
    current_county = county
    # get the current county's tides
    current_county_tides = tides
    # for all records in the county tide list
    for i in range(len(current_county_tides)):
      # get the current day's tides
      current_tide = current_county_tides[i]
      # get the date
      tide_date_str = str(current_tide[0])
      # if that date is not 'NaT'...
      if tide_date_str != 'NaT':
        # convert it to a datetime to match the eBird datetime so they can be compared
        tide_time_obj = datetime.strptime(tide_date_str, '%Y-%m-%d %H:%M:%S')
        # if the dates match...
        if str(obs_date_obj.date()) == str(tide_time_obj.date()):
          # get all these variables
          sID = current_record['sightingId']
          oDate = obs_date_obj.date()
          oTime = obs_date_obj.time()
          sName = current_record['comName']
          sCode = current_record['speciesCode']
          lName = current_record['locName']
          lID = current_record['locId']
          lat = current_record['lat']
          lng = current_record['lng']
          cnty = current_record['county']
          test_dict['sightingID'] = sID
          test_dict['observationDate'] = oDate
          test_dict['observationTime'] = oTime
          test_dict['county'] = cnty
          test_dict['speciesName'] = sName
          test_dict['speciesCode'] = sCode
          test_dict['locationName'] = lName
          test_dict['locationID'] = lID
          test_dict['lat'] = lat
          test_dict['lng'] = lng
        
          hMany = 0
          try:
            howMany = current_record['howMany']
            test_dict['howMany'] = howMany
          except:
            test_dict['howMany'] = 1

          cnty_name = next(key for key, value in county_fips.items() if value == cnty)
          cnty_name = cnty_name.strip()
          currenty_county = current_county + " County"

          if current_county == cnty_name:  
            test_dict['tideStationName'] = county
            test_dict['highhighTime'] = current_tide[0]
            test_dict['highhighWaterLevel'] = current_tide[1]
            test_dict['highTime'] = current_tide[2]
            test_dict['highWaterLevel'] = current_tide[3]
            test_dict['lowTime'] = current_tide[4]
            test_dict['lowWaterLevel'] = current_tide[5]
            test_dict['lowlowTime'] = current_tide[6]
            test_dict['lowlowWaterLevel'] = current_tide[7]
            
            test_list.append(test_dict)

for i in range(len(test_list)):
  print(test_list[i])


{'sightingID': 'L109136-2021-02-28 10:42S82501262', 'observationDate': datetime.date(2021, 2, 28), 'observationTime': datetime.time(10, 42), 'county': 'US-NJ-009', 'speciesName': 'American Oystercatcher', 'speciesCode': 'ameoys', 'locationName': 'Cape Island--South Cape May Meadows (SCMM)', 'locationID': 'L109136', 'lat': 38.9359287, 'lng': -74.9434519, 'howMany': 1, 'tideStationName': 'Cape May', 'highhighTime': Timestamp('2021-02-28 14:12:00'), 'highhighWaterLevel': 2.595, 'highTime': Timestamp('2021-02-28 01:54:00'), 'highWaterLevel': 2.379, 'lowTime': Timestamp('2021-02-28 20:06:00'), 'lowWaterLevel': 0.685, 'lowlowTime': Timestamp('2021-02-28 07:36:00'), 'lowlowWaterLevel': 0.5820000000000001}
{'sightingID': 'L3797793-2021-02-26 10:34S82368120', 'observationDate': datetime.date(2021, 2, 26), 'observationTime': datetime.time(10, 34), 'county': 'US-NJ-009', 'speciesName': 'American Oystercatcher', 'speciesCode': 'ameoys', 'locationName': 'Ocean City Welcome Center', 'locationID': 'L

In [15]:
import pandas as pd
all_data_df = pd.DataFrame(test_list)
all_data_df.head(20)

Unnamed: 0,sightingID,observationDate,observationTime,county,speciesName,speciesCode,locationName,locationID,lat,lng,howMany,tideStationName,highhighTime,highhighWaterLevel,highTime,highWaterLevel,lowTime,lowWaterLevel,lowlowTime,lowlowWaterLevel
0,L109136-2021-02-28 10:42S82501262,2021-02-28,10:42:00,US-NJ-009,American Oystercatcher,ameoys,Cape Island--South Cape May Meadows (SCMM),L109136,38.935929,-74.943452,1,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
1,L3797793-2021-02-26 10:34S82368120,2021-02-26,10:34:00,US-NJ-009,American Oystercatcher,ameoys,Ocean City Welcome Center,L3797793,39.288177,-74.582459,4,Cape May,2021-02-26 12:42:00,2.414,2021-02-26 00:00:00,2.128,2021-02-26 18:48:00,0.72,2021-02-26 06:00:00,0.576
2,L1636680-2021-02-24 08:14S82257699,2021-02-24,08:14:00,US-NJ-009,American Oystercatcher,ameoys,Cape May Point SP,L1636680,38.9325,-74.959167,2,Cape May,2021-02-24 11:12:00,2.407,2021-02-24 23:42:00,2.053,2021-02-24 17:24:00,0.886,2021-02-24 04:24:00,0.881
3,L211826-2021-02-27 17:20S82475086,2021-02-27,17:20:00,US-NJ-009,Black-bellied Plover,bkbplo,Jake's Landing,L211826,39.179565,-74.851024,20,Cape May,2021-02-27 13:30:00,2.69,2021-02-27 00:54:00,2.195,2021-02-27 19:30:00,0.722,2021-02-27 06:48:00,0.617
4,L302921-2021-02-25 13:29S82327487,2021-02-25,13:29:00,US-NJ-009,Black-bellied Plover,bkbplo,Norbury's Landing,L302921,39.050163,-74.927361,2,Cape May,2021-02-25 11:54:00,2.415,NaT,,2021-02-25 18:00:00,0.842,2021-02-25 05:18:00,0.804
5,L2826655-2021-02-25 10:50S82330921,2021-02-25,10:50:00,US-NJ-009,Black-bellied Plover,bkbplo,Dennis Creek WMA--Conswell Road,L2826655,39.069649,-74.9066,3,Cape May,2021-02-25 11:54:00,2.415,NaT,,2021-02-25 18:00:00,0.842,2021-02-25 05:18:00,0.804
6,L211914-2021-02-20 13:21S82028509,2021-02-20,13:21:00,US-NJ-009,Black-bellied Plover,bkbplo,Nummy Island,L211914,39.036589,-74.791274,3,Cape May,2021-02-20 07:12:00,2.428,2021-02-20 19:36:00,2.013,2021-02-20 01:00:00,1.448,2021-02-20 13:06:00,1.316
7,L274556-2021-02-14 12:22S81425988,2021-02-14,12:22:00,US-NJ-009,Black-bellied Plover,bkbplo,Stone Harbor Blvd--The Wetlands Institute,L274556,39.060817,-74.772613,2,Cape May,2021-02-14 15:12:00,2.608,2021-02-14 03:00:00,2.534,2021-02-14 08:42:00,1.104,2021-02-14 21:30:00,0.993
8,L109134-2021-02-09 17:15S80732937,2021-02-09,17:15:00,US-NJ-009,Black-bellied Plover,bkbplo,North Wildwood--Hereford Inlet/Seawall,L109134,39.011944,-74.7925,100,Cape May,2021-02-09 11:24:00,2.576,2021-02-09 23:36:00,2.169,2021-02-09 17:54:00,0.841,2021-02-09 04:48:00,0.828
9,L13516032-2021-02-07 09:31S80576895,2021-02-07,09:31:00,US-NJ-009,Black-bellied Plover,bkbplo,"Millman Blvd, Cape May US-NJ 39.05036, -74.92732",L13516032,39.050362,-74.927323,4,Cape May,2021-02-07 09:30:00,2.468,2021-02-07 22:30:00,2.25,2021-02-07 15:30:00,1.032,2021-02-07 02:42:00,0.641


In [16]:
# there are not always two high/low tides per day -- hence NaN values in the tidal columns are acceptable
all_data_df['observationDate'] =  pd.to_datetime(all_data_df['observationDate'], format='%Y-%M-%d')
all_data_df['observationTime'] =  pd.to_datetime(all_data_df['observationDate'], format='%H:%M:%S')
all_data_df.dtypes

sightingID                    object
observationDate       datetime64[ns]
observationTime       datetime64[ns]
county                        object
speciesName                   object
speciesCode                   object
locationName                  object
locationID                    object
lat                          float64
lng                          float64
howMany                        int64
tideStationName               object
highhighTime          datetime64[ns]
highhighWaterLevel           float64
highTime              datetime64[ns]
highWaterLevel               float64
lowTime               datetime64[ns]
lowWaterLevel                float64
lowlowTime            datetime64[ns]
lowlowWaterLevel             float64
dtype: object

In [13]:
from google.colab import drive
drive.mount('/content/gdrive')
nbdir = "/content/gdrive/My Drive/DSCI511/Colab/data/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [17]:
filepath = '/content/gdrive/My Drive/DSCI511/Colab/data/project/' + str(start_date) + '_' + str(end_date) + '.csv'
all_data_df.to_csv(filepath, index = False)