In [1]:
!pip install noaa_coops



The tidal data will be retrieved from the National Oceanic and Atmospheric Admistration Center for Operational Oceanographic Products and Services (NOAA CO-OPS) API. This API allows querying by tidal observation station ID. For the purpose of this project, the tidal observation stations in Atlantic City, Ship John Shoal, Cape May, and Sandy Hook will be used. Data is retrieved using the 'get_data' function and is returned in the pandas dataframe format. The pandas dataframes are converted into namedtuples below to allow flexible manipulation. Attributes of the dataset include high tide time, high tide water level, low tide time, low tide water level, and date.

In [2]:
# get tidal data from noaa coops api
import noaa_coops as nc # noaa_coops api
from datetime import date, timedelta, datetime
atlantic_city = nc.Station(8534720)
cape_may = nc.Station(8536110)
sandy_hook = nc.Station(8531680)
ship_john_shoal = nc.Station(8537121)

today = date.today().strftime('%Y-%m-%d')
today = str(today.replace('-', ''))

end_date = datetime.today() - timedelta(days=0)
end_date = end_date.strftime('%Y-%m-%d')
end_date = str(end_date.replace('-', ''))

start_date = '20200101'

atlantic_city_tides = atlantic_city.get_data(
     begin_date= start_date,
     end_date= end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

ship_john_shoal_tides = ship_john_shoal.get_data(
     begin_date= start_date,
     end_date= end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

cape_may_tides = cape_may.get_data(
     begin_date=start_date,
     end_date=end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

sandy_hook_tides = sandy_hook.get_data(
     begin_date=start_date,
     end_date=end_date,
     product="high_low",
     datum="STND",
     units="metric",
     time_zone="gmt")

In [3]:
ac_tides = list(atlantic_city_tides.itertuples(index=False, name='Atlantic_City'))
sjs_tides = list(ship_john_shoal_tides.itertuples(index=False, name='Cumberland_County'))
cm_tides = list(cape_may_tides.itertuples(index=False, name='Cape_May'))
sh_tides = list(sandy_hook_tides.itertuples(index=False, name='Sandy_Hook'))

tides_all = {'Cumberland': sjs_tides, 'Cape May': cm_tides, 'Monmouth': sh_tides, 'Atlantic': ac_tides}

for key, value in tides_all.items():
  current_list = value
  for i in range(len(current_list)):
    print(current_list[i])

Cumberland_County(date_time_HH=Timestamp('2020-01-01 19:12:00'), HH_water_level=7.376, date_time_H=Timestamp('2020-01-01 06:42:00'), H_water_level=7.325, date_time_L=Timestamp('2020-01-01 01:12:00'), L_water_level=6.12, date_time_LL=Timestamp('2020-01-01 13:06:00'), LL_water_level=5.8420000000000005)
Cumberland_County(date_time_HH=Timestamp('2020-01-02 19:54:00'), HH_water_level=7.3229999999999995, date_time_H=Timestamp('2020-01-02 07:42:00'), H_water_level=7.228, date_time_L=Timestamp('2020-01-02 14:00:00'), L_water_level=5.886, date_time_LL=Timestamp('2020-01-02 01:48:00'), LL_water_level=5.803999999999999)
Cumberland_County(date_time_HH=Timestamp('2020-01-03 20:48:00'), HH_water_level=7.294, date_time_H=Timestamp('2020-01-03 08:36:00'), H_water_level=7.254, date_time_L=Timestamp('2020-01-03 14:42:00'), L_water_level=5.978, date_time_LL=Timestamp('2020-01-03 02:36:00'), LL_water_level=5.775)
Cumberland_County(date_time_HH=Timestamp('2020-01-04 09:24:00'), HH_water_level=7.496, date_t

FIPS codes will be needed to query the eBird database for sightings in a given county. FIPS are numbers used to uniquely identify geographic regions, such as counties. FIPS codes are five-digit integers, with the first two digits indicating the state and the last three digits being county identifers. To acquire a list of full five-digit FIPS codes, the fcc website will be scraped using beautifulsoup. This project scope is New Jersey only, while the website contains FIPS codes for all 50 states. The New Jersey state code is 34 - so a regex ('34\d{3}\s*.*') was used to parse out five-digit numbers followed by a string containign the county name. FIPS and county codes were then extracted individually. The eBird API accepts strings in the US-{two-digit state code}-{county FIPS ID} format. So for new Jersey, we would use something like US-NJ-001. The five-digit FIPS codes and county names are manipulated into the desired format below, then saved into a dictionary with county names as keys.  



In [4]:
# importing beautifulsoup to scrape fcc.gov for fips codes (county data)
from bs4 import BeautifulSoup
import urllib.request
import re

urlpage =  'https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt'
page = urllib.request.urlopen(urlpage)
soup = BeautifulSoup(page, 'html.parser')

# get all matches for FIPS -whitespace- county name (34 is NJ state code)
matches = re.findall( r'34\d{3}\s*.*', str(soup))
# the first match is the new jersey state fips code - we only need county fips codes. omitting first match
matches = matches[1:]

counties = {}
for i in range(len(matches)):
    fips_codes = re.findall(r'34\d{3}', matches[i])
    fips_code = fips_codes[0]
    county_names = re.findall(r'\s(.*County)', matches[i])
    county_name = county_names[0]
    county_name = county_name.lstrip()
    counties[county_name] = fips_code

# county fips codes are the last 3 digits of the whole code
county_fips = {}
for key, value in counties.items():
    corrected_value = value[2:5]
    corrected_value = 'US-NJ-' + corrected_value
    corrected_key = key[:-7]
    county_fips[corrected_key] = corrected_value
print(county_fips)

{'Atlantic': 'US-NJ-001', 'Bergen': 'US-NJ-003', 'Burlington': 'US-NJ-005', 'Camden': 'US-NJ-007', 'Cape May': 'US-NJ-009', 'Cumberland': 'US-NJ-011', 'Essex': 'US-NJ-013', 'Gloucester': 'US-NJ-015', 'Hudson': 'US-NJ-017', 'Hunterdon': 'US-NJ-019', 'Mercer': 'US-NJ-021', 'Middlesex': 'US-NJ-023', 'Monmouth': 'US-NJ-025', 'Morris': 'US-NJ-027', 'Ocean': 'US-NJ-029', 'Passaic': 'US-NJ-031', 'Salem': 'US-NJ-033', 'Somerset': 'US-NJ-035', 'Sussex': 'US-NJ-037', 'Union': 'US-NJ-039', 'Warren': 'US-NJ-041'}


eBird query:

eBird data will be acquisitioned using the eBird API. First, the 'get_taxonomy' function is used to get species codes and associated metadata for respective species. 23 species of shorebird have been selected objectively based on relative abundance in the selected geographical region. The taxonomic data will be stored in a dictionary and leveraged to request data for the selected species using the 'get_species_observations' function, which returns fields including observation date, specie observed, observation count, observatoin time, latitude, longitude, location name, location ID, and county, among several others that will not be included for this project. 

eBird legacy data:

Note, the eBird API is only capable of retrieving data from the past 30 days. To remedy this and meet the date range objective of the project (Jan 2020 - forward), a data request was made directly on the eBird website. Access was granted to my user account, and all observation data for New Jersey from Jan 2020 to present. The .txt file was received via email, and was filtered using Excel Power Query for respective species. In this script, the file will be read into a pandas dataframe and merged with the data requisitioned via the eBird API. 

In [5]:
!pip install ebird-api



In [6]:
# import ebird API package and assign API key
from ebird.api import get_observations
from ebird.api import get_species_observations
from ebird.api import get_nearby_observations
from ebird.api import get_visits
from ebird.api import get_checklist
from ebird.api import get_taxonomy, get_taxonomy_forms, get_taxonomy_versions
from ebird.api import get_notable_observations
api_key = 'aape5hn8f10a' # api key obtained by request from eBird (personal use only - PLEASE DO NOT USE OR SHARE!)

In [7]:
taxonomy = get_taxonomy(api_key) # get scientific name, common name, species Code, category, taxonomic order, etc.

# extracting only the data we need for this project into a new dictionary
shorebirds = [] # list of dictionaries from taxonomy for shorebirds
for d in taxonomy:
    for key, value in d.items():
        if key == 'order':
            if d[key] == 'Charadriiformes':
                shorebirds.append(d)

common_names = []
species_codes = []
for i in range(len(shorebirds)):
    current_species = shorebirds[i]
    for key, value in current_species.items():
        if key == 'comName':
            common_names.append(current_species['comName'])
        if key == 'speciesCode':
            species_codes.append(current_species['speciesCode'])

shorebirds_dict_pre = {}
for i in range(len(common_names)):
    current_name = common_names[i]
    shorebirds_dict_pre[current_name] = species_codes[i]

# scope is predefined list of shorebirds (not gulls, terns, jaegers)
# need to decrease API request volume and tighten the scope of the project
narrowed_scope_abbv = ['killde', 'sander', 'dunlin', 'pursan', 'ameoys',
                       'bkbplo', 'greyel', 'semplo', 'lobdow', 'sposan',
                       'lesyel', 'leasan', 'margod', 'willet1', 'shbdow',
                       'wessan', 'pecsan', 'amgplo', 'solsan', 'stisan',
                       'hudgod', 'pipplo', 'uplsan']
shorebirds_dict = {}
for key, value in shorebirds_dict_pre.items():
  if value in narrowed_scope_abbv:
    shorebirds_dict[key] = value

for key, value in shorebirds_dict.items():
  print(key, value)

American Oystercatcher ameoys
Black-bellied Plover bkbplo
American Golden-Plover amgplo
Semipalmated Plover semplo
Piping Plover pipplo
Killdeer killde
Upland Sandpiper uplsan
Hudsonian Godwit hudgod
Marbled Godwit margod
Stilt Sandpiper stisan
Sanderling sander
Dunlin dunlin
Purple Sandpiper pursan
Least Sandpiper leasan
Pectoral Sandpiper pecsan
Western Sandpiper wessan
Short-billed Dowitcher shbdow
Long-billed Dowitcher lobdow
Spotted Sandpiper sposan
Solitary Sandpiper solsan
Greater Yellowlegs greyel
Willet willet1
Lesser Yellowlegs lesyel


In [8]:
# relevant counties are Atlantic, Monmouth, Cumberland and Cape May
# these queries pull a lot of data - so data will only be queried once from the API

relevant_county_codes = []
for key, value in county_fips.items():
    if key in 'Cumberland' or key in 'Monmouth' or key in 'Cape May' or key in 'Atlantic': 
        relevant_county_codes.append(value)

# list of shorebird species codes for query
relevant_species_codes = []
for key, value in shorebirds_dict.items():
    relevant_species_codes.append(value)

def append_sightings(start, stop, county, specie): # use this sparingly, data is expensive
  try: # ignore bad requests (404 errors)
      get_obs = get_species_observations(api_key, specie, county, back=30) 
      for i in range(len(get_obs)):
        current_dict = get_obs[i]
        if current_dict:
          current_dict['county'] = county
          records.append(current_dict)
  except:
    print('error')

records = []

i = 0 # the limit of records per query is 30, so we need to break it down into intervals of 30
j = 30

for c in range(len(relevant_county_codes)):
  current_county = relevant_county_codes[c]
  for n in range(len(relevant_species_codes)):
    current_specie = relevant_species_codes[n]
    append_sightings(i, j, current_county, current_specie)
    i += 30
    j += 30

# some checklists were empty (as they did not contain target species) - let's remove these
valid_records = []
for i in range(len(records)):
    if not records[i]:
        continue
    else:
        valid_records.append(records[i])

In [9]:
import pandas as pd
ebird_API_df = pd.DataFrame(valid_records)
ebird_API_df['obsDt']= pd.to_datetime(ebird_API_df['obsDt'])
ebird_API_df['date'] = [d.date() for d in ebird_API_df['obsDt']]
ebird_API_df['time'] = [d.time() for d in ebird_API_df['obsDt']]
ebird_API_df = ebird_API_df[['comName', 'locId', 'locName', 'date', 'time', 'howMany',
                             'lat', 'lng', 'subId', 'county']]
ebird_API_df['date'] = ebird_API_df['date'].astype('datetime64[ns]').dt.date
#ebird_API_df['date'] = ebird_API_df['date'].str.replace('-','/')
ebird_API_df = ebird_API_df.sort_values(by=['time'], ascending=False)
ebird_API_df.head()

Unnamed: 0,comName,locId,locName,date,time,howMany,lat,lng,subId,county
298,Killdeer,L6144907,Fletcher Lake,2021-02-21,22:20:00,1.0,40.208115,-74.00661,S82130104,US-NJ-025
330,Dunlin,L10854077,Belmar Beachfront,2021-02-14,19:54:00,3.0,40.176814,-74.013464,S81505295,US-NJ-025
209,Purple Sandpiper,L904314,Cold Spring Inlet (Cape May Harbor mouth),2021-02-21,17:52:00,1.0,38.939183,-74.865904,S82150845,US-NJ-009
40,Greater Yellowlegs,L11941376,Absecon Creek Faunce Landing Boat Ramp area,2021-03-03,17:50:00,3.0,39.42639,-74.487139,S82963960,US-NJ-001
305,Sanderling,L10874116,"Sandy Hook--Lot I, Proving Grounds, and North ...",2021-03-06,17:50:00,9.0,40.467049,-73.998396,S82899156,US-NJ-025


In [10]:
from google.colab import drive
drive.mount('/content/gdrive')
nbdir = "/content/gdrive/My Drive/DSCI511/Colab/data/"
data_loc = '/content/gdrive/My Drive/DSCI511/Colab/data/project/legacy_data/nj_shorebird_legacy_data.csv'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [11]:
legacy_ebird_data = pd.read_csv(data_loc) 
legacy_ebird_data.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,GLOBAL UNIQUE IDENTIFIER,LAST EDITED DATE,TAXONOMIC ORDER,CATEGORY,COMMON NAME,SCIENTIFIC NAME,SUBSPECIES COMMON NAME,SUBSPECIES SCIENTIFIC NAME,OBSERVATION COUNT,BREEDING BIRD ATLAS CODE,BREEDING BIRD ATLAS CATEGORY,AGE/SEX,COUNTRY,COUNTRY CODE,STATE,STATE CODE,COUNTY,COUNTY CODE,IBA CODE,BCR CODE,USFWS CODE,ATLAS BLOCK,LOCALITY,LOCALITY ID,LOCALITY TYPE,LATITUDE,LONGITUDE,OBSERVATION DATE,TIME OBSERVATIONS STARTED,OBSERVER ID,SAMPLING EVENT IDENTIFIER,PROTOCOL TYPE,PROTOCOL CODE,PROJECT CODE,DURATION MINUTES,EFFORT DISTANCE KM,EFFORT AREA HA,NUMBER OBSERVERS,ALL SPECIES REPORTED,GROUP IDENTIFIER,HAS MEDIA,APPROVED,REVIEWED,REASON,TRIP COMMENTS,SPECIES COMMENTS,Column1
0,URN:CornellLabOfOrnithology:EBIRD:OBS854789436,1/27/2020 17:21,5604,species,American Oystercatcher,Haematopus palliatus,,,10,,,,United States,US,New Jersey,US-NJ,Atlantic,US-NJ-001,US-NJ_3141,,,,Brigantine Island--South Cove,L2364364,H,39.383977,-74.41511,1/22/2020,3:53:00 PM,obsr17906,S63678356,Traveling,P22,EBIRD,93.0,0.644,,2.0,1,G4899412,0,1,0,,,,
1,URN:CornellLabOfOrnithology:EBIRD:OBS855442309,1/24/2020 19:20,5604,species,American Oystercatcher,Haematopus palliatus,,,15,,,,United States,US,New Jersey,US-NJ,Atlantic,US-NJ-001,US-NJ_3141,,,,Brigantine Island--South Cove,L2364364,H,39.383977,-74.41511,1/24/2020,1:00:00 PM,obsr236396,S63734722,Traveling,P22,EBIRD_NJ,80.0,0.402,,3.0,1,G4905047,1,1,0,,,,
2,URN:CornellLabOfOrnithology:EBIRD:OBS855573613,1/24/2020 20:14,5604,species,American Oystercatcher,Haematopus palliatus,,,15,,,,United States,US,New Jersey,US-NJ,Atlantic,US-NJ-001,US-NJ_3141,,,,Brigantine Island--South Cove,L2364364,H,39.383977,-74.41511,1/24/2020,1:00:00 PM,obsr432878,S63745602,Traveling,P22,EBIRD_NJ,80.0,0.402,,3.0,1,G4905047,1,1,0,,,,
3,URN:CornellLabOfOrnithology:EBIRD:OBS853877493,1/20/2020 21:16,5604,species,American Oystercatcher,Haematopus palliatus,,,140,,,,United States,US,New Jersey,US-NJ,Atlantic,US-NJ-001,US-NJ_3141,,,,Brigantine Island--South Cove,L2364364,H,39.383977,-74.41511,1/20/2020,2:26:00 PM,obsr164311,S63598206,Traveling,P22,EBIRD,20.0,0.322,,2.0,1,G4894194,0,1,0,,,,
4,URN:CornellLabOfOrnithology:EBIRD:OBS855555158,1/24/2020 19:20,5604,species,American Oystercatcher,Haematopus palliatus,,,15,,,,United States,US,New Jersey,US-NJ,Atlantic,US-NJ-001,US-NJ_3141,,,,Brigantine Island--South Cove,L2364364,H,39.383977,-74.41511,1/24/2020,1:00:00 PM,obsr139362,S63744118,Traveling,P22,EBIRD_NJ,80.0,0.402,,3.0,1,G4905047,1,1,0,,,,


In [12]:
legacy_ebird_data = legacy_ebird_data[['OBSERVATION DATE', 'TIME OBSERVATIONS STARTED',
                                       'COMMON NAME', 'LOCALITY', 'LOCALITY ID',
                                       'LATITUDE', 'LONGITUDE', 'OBSERVATION COUNT',
                                       'COUNTY CODE', 'OBSERVER ID']]
legacy_ebird_data.columns = ['date', 'time', 'comName', 'locName', 'locId',
                             'lat', 'lng', 'howMany', 'county', 'subId']
legacy_ebird_data['date'] = legacy_ebird_data['date'].astype('datetime64[ns]').dt.date
legacy_ebird_data['time'] = legacy_ebird_data['time'].astype('datetime64[ns]').dt.time
legacy_ebird_data = legacy_ebird_data.sort_values(by=['date'], ascending=False)
legacy_ebird_data.head()

Unnamed: 0,date,time,comName,locName,locId,lat,lng,howMany,county,subId
48331,2021-01-31,08:28:00,Killdeer,Cape Island--The Beanery/Rea Farm,L129879,38.94624,-74.941208,2,US-NJ-009,obsr155704
48689,2021-01-31,10:14:00,Sanderling,Cape Island--CMP (Cape May Point),L276107,38.937787,-74.967072,33,US-NJ-009,obsr105983
48669,2021-01-31,13:24:00,Sanderling,Cape Island--2nd Ave. jetty/pavilion,L269018,38.929361,-74.934536,2,US-NJ-009,obsr134087
48660,2021-01-31,11:00:00,Sanderling,North Wildwood--2nd Ave seawall,L10643724,39.001699,-74.786704,30,US-NJ-009,obsr356525
48636,2021-01-31,13:25:00,Sanderling,North Wildwood--2nd Ave seawall,L10643724,39.001699,-74.786704,1,US-NJ-009,obsr838827


In [13]:
merged_ebird_df = pd.concat([legacy_ebird_data, ebird_API_df], ignore_index=True)
#merged_ebird_df = legacy_ebird_data.append(ebird_API_df)
print(legacy_ebird_data.size + ebird_API_df.size)
merged_ebird_df.size

501960


501960

In [14]:
#merged_ebird_df['sightingId'] = merged_ebird_df.date.map(str) + merged_ebird_df.time.map(str) + merged_ebird_df.locId + merged_ebird_df.subId
merged_ebird_df = merged_ebird_df.sort_values(by=['comName'])
merged_ebird_df.size

501960

Merging of data:

In [16]:
from typing import NamedTuple
test_list = []

class eBird_Tidal_Join(NamedTuple):
    sightingId: str
    tidal_station: str
    hh_time: datetime
    hh_water_level: float
    h_time: datetime
    h_water_level: float
    l_time: datetime
    l_water_level: float
    ll_time: datetime
    ll_water_level: float

list_of_sightings = []
test_list = []

for col, r in merged_ebird_df.iterrows(): # for row in the eBird dataframe
  obs_date_obj = r['date'] # get date as handle to join with tidal data
  test_dict = {} # create temp dict
  # for all items in the county tide dictionary
  for county, tides in tides_all.items(): # for each county in the tides dict
    current_county = county
    # get the current county's tides
    current_county_tides = tides
    # for all records in the county tide list
    for j in range(len(current_county_tides)):
      # get the current day's tides
      current_tide = current_county_tides[j]
      # get the date
      tide_date_str = str(current_tide[0])
      # if that date is not 'NaT'...
      if tide_date_str != 'NaT':
        # convert it to a datetime to match the eBird datetime so they can be compared
        tide_time_obj = datetime.strptime(tide_date_str, '%Y-%m-%d %H:%M:%S')
        # if the dates match...
        if str(obs_date_obj) == str(tide_time_obj.date()):
          # get all these variables
          #sID = r['sightingId']
          oDate = r['date']
          oTime = r['time']
          sName = r['comName']
          lName = r['locName']
          lID = r['locId']
          lat = r['lat']
          lng = r['lng']
          cnty = r['county']
          #test_dict['sightingID'] = sID
          test_dict['observationDate'] = oDate
          test_dict['observationTime'] = oTime
          test_dict['county'] = cnty
          test_dict['speciesName'] = sName
          test_dict['locationName'] = lName
          test_dict['locationID'] = lID
          test_dict['lat'] = lat
          test_dict['lng'] = lng
        
          hMany = 0
          try:
            howMany = r['howMany']
            test_dict['howMany'] = howMany
          except:
            test_dict['howMany'] = 1

          cnty_name = next(key for key, value in county_fips.items() if value == cnty)
          cnty_name = cnty_name.strip()
          if current_county == cnty_name:  
            test_dict['tideStationName'] = county
            test_dict['highhighTime'] = current_tide[0]
            test_dict['highhighWaterLevel'] = current_tide[1]
            test_dict['highTime'] = current_tide[2]
            test_dict['highWaterLevel'] = current_tide[3]
            test_dict['lowTime'] = current_tide[4]
            test_dict['lowWaterLevel'] = current_tide[5]
            test_dict['lowlowTime'] = current_tide[6]
            test_dict['lowlowWaterLevel'] = current_tide[7]
            
            test_list.append(test_dict)


In [17]:
all_data_df = pd.DataFrame(test_list)
all_data_df = all_data_df.sort_values(by=['observationDate'], ascending=False)
all_data_df.head(300)

Unnamed: 0,observationDate,observationTime,county,speciesName,locationName,locationID,lat,lng,howMany,tideStationName,highhighTime,highhighWaterLevel,highTime,highWaterLevel,lowTime,lowWaterLevel,lowlowTime,lowlowWaterLevel
18419,2021-02-28,07:10:00,US-NJ-025,Killdeer,Manasquan Reservoir IBA,L242130,40.178939,-74.208441,5,Monmouth,2021-02-28 13:36:00,2.549,2021-02-28 00:54:00,2.514,2021-02-28 07:30:00,0.722,2021-02-28 19:54:00,0.598
4446,2021-02-28,10:42:00,US-NJ-009,American Oystercatcher,Cape Island--South Cape May Meadows (SCMM),L109136,38.935929,-74.943452,1,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
9662,2021-02-28,13:31:00,US-NJ-009,Dunlin,Nummy Island,L211914,39.036589,-74.791274,80,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
18437,2021-02-28,08:05:00,US-NJ-025,Killdeer,"Sandy Hook Beach, Highlands US-NJ 40.40674, -7...",L13870292,40.406741,-73.978352,2,Monmouth,2021-02-28 13:36:00,2.549,2021-02-28 00:54:00,2.514,2021-02-28 07:30:00,0.722,2021-02-28 19:54:00,0.598
31358,2021-02-28,08:09:00,US-NJ-009,Sanderling,"825 Boardwalk, Ocean City US-NJ 39.27567, -74....",L12508994,39.275672,-74.568914,5,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31399,2021-01-30,13:15:00,US-NJ-009,Sanderling,Avalon Dunes (30th-60th streets),L715654,39.085274,-74.727623,24,Cape May,2021-01-30 14:36:00,2.855,2021-01-30 02:30:00,2.212,2021-01-30 20:42:00,0.971,2021-01-30 07:42:00,0.842
27289,2021-01-30,10:06:00,US-NJ-001,Long-billed Dowitcher,Edwin B. Forsythe NWR (formerly Brig),L109133,39.465000,-74.447778,7,Atlantic,2021-01-30 13:36:00,3.343,2021-01-30 01:18:00,2.712,2021-01-30 19:54:00,1.763,2021-01-30 06:48:00,1.607
31299,2021-01-30,13:05:00,US-NJ-025,Sanderling,Shark River Inlet,L752584,40.187129,-74.008042,30,Monmouth,2021-01-30 13:54:00,2.704,2021-01-30 01:24:00,2.233,2021-01-30 20:12:00,0.998,2021-01-30 07:36:00,0.895
27561,2021-01-30,08:56:00,US-NJ-001,Marbled Godwit,South Brigantine,L13433409,39.374506,-74.405059,10,Atlantic,2021-01-30 13:36:00,3.343,2021-01-30 01:18:00,2.712,2021-01-30 19:54:00,1.763,2021-01-30 06:48:00,1.607


In [19]:
all_data_df['observationTime'] = pd.to_datetime(all_data_df['observationTime'], format='%I:%M:%S %p', errors = 'ignore')
all_data_df.head(300)

Unnamed: 0,observationDate,observationTime,county,speciesName,locationName,locationID,lat,lng,howMany,tideStationName,highhighTime,highhighWaterLevel,highTime,highWaterLevel,lowTime,lowWaterLevel,lowlowTime,lowlowWaterLevel
18419,2021-02-28,07:10:00,US-NJ-025,Killdeer,Manasquan Reservoir IBA,L242130,40.178939,-74.208441,5,Monmouth,2021-02-28 13:36:00,2.549,2021-02-28 00:54:00,2.514,2021-02-28 07:30:00,0.722,2021-02-28 19:54:00,0.598
4446,2021-02-28,10:42:00,US-NJ-009,American Oystercatcher,Cape Island--South Cape May Meadows (SCMM),L109136,38.935929,-74.943452,1,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
9662,2021-02-28,13:31:00,US-NJ-009,Dunlin,Nummy Island,L211914,39.036589,-74.791274,80,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
18437,2021-02-28,08:05:00,US-NJ-025,Killdeer,"Sandy Hook Beach, Highlands US-NJ 40.40674, -7...",L13870292,40.406741,-73.978352,2,Monmouth,2021-02-28 13:36:00,2.549,2021-02-28 00:54:00,2.514,2021-02-28 07:30:00,0.722,2021-02-28 19:54:00,0.598
31358,2021-02-28,08:09:00,US-NJ-009,Sanderling,"825 Boardwalk, Ocean City US-NJ 39.27567, -74....",L12508994,39.275672,-74.568914,5,Cape May,2021-02-28 14:12:00,2.595,2021-02-28 01:54:00,2.379,2021-02-28 20:06:00,0.685,2021-02-28 07:36:00,0.582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31399,2021-01-30,13:15:00,US-NJ-009,Sanderling,Avalon Dunes (30th-60th streets),L715654,39.085274,-74.727623,24,Cape May,2021-01-30 14:36:00,2.855,2021-01-30 02:30:00,2.212,2021-01-30 20:42:00,0.971,2021-01-30 07:42:00,0.842
27289,2021-01-30,10:06:00,US-NJ-001,Long-billed Dowitcher,Edwin B. Forsythe NWR (formerly Brig),L109133,39.465000,-74.447778,7,Atlantic,2021-01-30 13:36:00,3.343,2021-01-30 01:18:00,2.712,2021-01-30 19:54:00,1.763,2021-01-30 06:48:00,1.607
31299,2021-01-30,13:05:00,US-NJ-025,Sanderling,Shark River Inlet,L752584,40.187129,-74.008042,30,Monmouth,2021-01-30 13:54:00,2.704,2021-01-30 01:24:00,2.233,2021-01-30 20:12:00,0.998,2021-01-30 07:36:00,0.895
27561,2021-01-30,08:56:00,US-NJ-001,Marbled Godwit,South Brigantine,L13433409,39.374506,-74.405059,10,Atlantic,2021-01-30 13:36:00,3.343,2021-01-30 01:18:00,2.712,2021-01-30 19:54:00,1.763,2021-01-30 06:48:00,1.607


In [23]:
final_df = all_data_df.copy()
final_df['highhighTime'] =  pd.to_datetime(final_df['highhighTime'], format='%H:%M:%S').dt.time
final_df['highhighTime'] = final_df['highhighTime'].apply(lambda x: x.strftime('%H:%M:%S'))
final_df['highTime'] =  pd.to_datetime(final_df['highTime'], format='%H:%M:%S').dt.time
final_df['lowTime'] =  pd.to_datetime(final_df['lowTime'], format='%H:%M:%S').dt.time
final_df['lowlowTime'] =  pd.to_datetime(final_df['lowlowTime'], format='%H:%M:%S').dt.time
final_df.head(300)

Unnamed: 0,observationDate,observationTime,county,speciesName,locationName,locationID,lat,lng,howMany,tideStationName,highhighTime,highhighWaterLevel,highTime,highWaterLevel,lowTime,lowWaterLevel,lowlowTime,lowlowWaterLevel
18419,2021-02-28,07:10:00,US-NJ-025,Killdeer,Manasquan Reservoir IBA,L242130,40.178939,-74.208441,5,Monmouth,13:36:00,2.549,00:54:00,2.514,07:30:00,0.722,19:54:00,0.598
4446,2021-02-28,10:42:00,US-NJ-009,American Oystercatcher,Cape Island--South Cape May Meadows (SCMM),L109136,38.935929,-74.943452,1,Cape May,14:12:00,2.595,01:54:00,2.379,20:06:00,0.685,07:36:00,0.582
9662,2021-02-28,13:31:00,US-NJ-009,Dunlin,Nummy Island,L211914,39.036589,-74.791274,80,Cape May,14:12:00,2.595,01:54:00,2.379,20:06:00,0.685,07:36:00,0.582
18437,2021-02-28,08:05:00,US-NJ-025,Killdeer,"Sandy Hook Beach, Highlands US-NJ 40.40674, -7...",L13870292,40.406741,-73.978352,2,Monmouth,13:36:00,2.549,00:54:00,2.514,07:30:00,0.722,19:54:00,0.598
31358,2021-02-28,08:09:00,US-NJ-009,Sanderling,"825 Boardwalk, Ocean City US-NJ 39.27567, -74....",L12508994,39.275672,-74.568914,5,Cape May,14:12:00,2.595,01:54:00,2.379,20:06:00,0.685,07:36:00,0.582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31399,2021-01-30,13:15:00,US-NJ-009,Sanderling,Avalon Dunes (30th-60th streets),L715654,39.085274,-74.727623,24,Cape May,14:36:00,2.855,02:30:00,2.212,20:42:00,0.971,07:42:00,0.842
27289,2021-01-30,10:06:00,US-NJ-001,Long-billed Dowitcher,Edwin B. Forsythe NWR (formerly Brig),L109133,39.465000,-74.447778,7,Atlantic,13:36:00,3.343,01:18:00,2.712,19:54:00,1.763,06:48:00,1.607
31299,2021-01-30,13:05:00,US-NJ-025,Sanderling,Shark River Inlet,L752584,40.187129,-74.008042,30,Monmouth,13:54:00,2.704,01:24:00,2.233,20:12:00,0.998,07:36:00,0.895
27561,2021-01-30,08:56:00,US-NJ-001,Marbled Godwit,South Brigantine,L13433409,39.374506,-74.405059,10,Atlantic,13:36:00,3.343,01:18:00,2.712,19:54:00,1.763,06:48:00,1.607


In [None]:
filepath = '/content/gdrive/My Drive/DSCI511/Colab/data/project/' + str(start_date) + '_' + str(end_date) + '.csv'
final_df.to_csv(filepath, date_format='%H:%M:%S', index = False)