# IMOS realtime data to DwC Event Core - WC Example

Plan: Convert the realtime QCed IMOS marine mammal position data to DwC, and then publish the result to the IPT.

Contemporary notes from our meet w/ Ian Jonsen here: https://docs.google.com/document/d/1hibIxBbyGwa7b5-LRpnKIyjnr41EkUPKBzdUJoyAfaU/edit#heading=h.6bqw4binj5hq

### Inputs / configuration parameters:

* QCed data for a given campaign or project as the exported CSVs with appended position correction data as per https://github.com/ianjonsen/ArgosQC
* credentials for IPT and 
* corresponding project ID to associate new data with
* minimum quality hit to keep

In [2]:
import pandas as pd

metadata_df = pd.read_csv('input/loggerheadimos/4135_265986_24A0510_metadata.csv')
loc_df = pd.concat([pd.read_csv('input/loggerheadimos/4135_265986_24A0510_Argos.csv'), pd.read_csv('input/loggerheadimos/4135_265986_24A0510_FastGPS.csv')])

In [3]:
metadata_df[0:10]

Unnamed: 0,sattag_program,device_id,ptt,tag_serial_number,body,dive_wmo_ref,tag_type,common_name,species,release_longitude,...,release_date,recovery_date,age_class,sex,length,estimated_mass,actual_mass,state_country,dive_start,dive_end
0,4135,4135_265986_24A0510,265986,24A0510,,,Splash10-F-351A,Loggerhead turtle,Caretta caretta,152.521977,...,2024-06-21T01:54:00Z,,,,,,,Australia,2024-06-21T00:00:00Z,2024-09-05T00:00:00Z


## Metadata - create events and occurrences for each row

Process the metadata csv into Event Core (animal releases + tag attachments) + Occurrences (HumanObservations) + emofs for same (biological measurements are here)

In [11]:
metadata_df

# event entries: eventID = [body]-[release_date]
#                eventDate =  [release_date]
#                latitude = [release_latitude]
#                longitude = [release_longitude]
#                modified = current_date()
#                geodeticDatum = EPSG:4326
#                country = state_country  (error in current dataset, French Overseas Territory should be French Southern Lands)

column_map = {'release_date':'eventDate',
              'release_latitude':'decimalLatitude',
              'release_longitude':'decimalLongitude',
              'state_country':'country'}

event_df = metadata_df.rename(columns=column_map)
event_df['modified'] = pd.to_datetime('now', utc=True).round(freq='s')
# eventID is instrument serial number (body) + release datetime (eventDate)
event_df['eventID'] = event_df['tag_serial_number'].astype(str).str.cat(event_df['eventDate'].astype(str), sep='-')
event_df['geodeticDatum'] = 'EPSG:4326'
# Optional: truncate the extra columns from the core
event_df =  event_df[['eventID', 'eventDate', 'decimalLatitude', 'decimalLongitude', 'modified', 'geodeticDatum', 'country']]

In [12]:
event_df[0:5]

Unnamed: 0,eventID,eventDate,decimalLatitude,decimalLongitude,modified,geodeticDatum,country
0,24A0510-2024-06-21T01:54:00Z,2024-06-21T01:54:00Z,-32.177847,152.521977,2024-11-20 18:03:44+00:00,EPSG:4326,Australia


In [5]:
# EMOFs to harvest
# for the release events
# instrument manufacturer and model  (SMRU + [tag type])
# PTT
# device id
# WMO ref


In [13]:
# occ ext. entries:    occurrenceID = [tag_serial_number]-[release_date]
#                      species = [species]
#                      sex = [sex]
#                      eventID = [body]-[release_date]
#                      organismID = [body]-[release_date]

occ_column_map = {'release_date':'eventDate',
                  'species':'scientificName'}
occ_df = metadata_df.rename(columns=occ_column_map)
occ_df['occurrenceID'] = occ_df['tag_serial_number'].astype(str).str.cat(occ_df['eventDate'].astype(str), sep='-')
occ_df['eventID'] = occ_df['tag_serial_number'].astype(str).str.cat(occ_df['eventDate'].astype(str), sep='-')
occ_df['organismID'] = occ_df['tag_serial_number'].astype(str).str.cat(occ_df['eventDate'].astype(str), sep='-')
occ_df['basisOfRecord'] = 'HumanObservation'
occ_df = occ_df[['occurrenceID', 'organismID','eventID', 'sex', 'scientificName', 'basisOfRecord']]

In [14]:
occ_df[0:5]

Unnamed: 0,occurrenceID,organismID,eventID,sex,scientificName,basisOfRecord
0,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z,,Caretta caretta,HumanObservation


In [15]:
# EMOFs to harvest
# for the occurrences:
# sex
# length
# weight
loc_df

Unnamed: 0,device_id,DeployID,Instr,Date,Type,Quality,Latitude,Longitude,Error radius,Error Semi-major axis,...,Id...91,Range...92,Signal...93,Doppler...94,CNR...95,Id...96,Range...97,Signal...98,Doppler...99,CNR...100
0,4135_265986_24A0510,265986.0,Mk10,10:45:00 21-Jun-2024,User,G,-32.17780,152.52200,,,...,,,,,,,,,,
1,4135_265986_24A0510,265986.0,Mk10,14:06:15 21-Jun-2024,Argos,1,-32.17470,152.47900,1162.0,9134.0,...,,,,,,,,,,
2,4135_265986_24A0510,265986.0,Mk10,17:44:32 21-Jun-2024,Argos,1,-32.18850,152.53670,1408.0,14856.0,...,,,,,,,,,,
3,4135_265986_24A0510,265986.0,Mk10,19:44:29 21-Jun-2024,Argos,1,-32.17380,152.53810,580.0,7120.0,...,,,,,,,,,,
4,4135_265986_24A0510,265986.0,Mk10,21:36:16 21-Jun-2024,Argos,1,-32.18530,152.50760,736.0,2997.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,4135_265986_24A0510,,,,,,-30.39031,159.57213,,,...,,,,,,,,,,
572,4135_265986_24A0510,,,,,,-30.43798,159.70898,,,...,,,,,,,,,,
573,4135_265986_24A0510,,,,,,-30.43827,159.70935,,,...,,,,,,,,,,
574,4135_265986_24A0510,,,,,,-30.44083,159.72816,,,...,,,,,,,,,,


In [17]:
# Create event and occurrence entries from the locations data file
# 
# Event entries:  eventID = organismID + date_detected
#                 latitude = ssm_lat if exists else lat
#                 longitude = ssm_lon if exists else lon
#                 eventDate = d_date
#                 geodeticDatum = EPSG:4326
#                 coordinateUncertaintyInMeters = max(ssm_x_se, ssm_y_se)  -- 1 SE or 2 SE?
#                 

# add the relevant columns to loc_df from metadata_df to create organismID
loc_df = loc_df.merge(metadata_df[['device_id', 'tag_serial_number', 'release_date', 'species']], 
                      how='left', left_on='device_id', right_on='device_id')


In [20]:
# combine the organismID + the detection date into the eventID
loc_df['eventID'] = loc_df['tag_serial_number'].astype(str).str.cat(loc_df[['release_date', 'date']], sep='-')

In [23]:

# Check: is this correct to do in all cases?
# where there has been no correction made (corrected positions = NA, 
#       then use the raw position data
loc_df['decimalLatitude'] = loc_df['ssm_lat'].fillna(loc_df['Latitude'])
loc_df['decimalLongitude'] = loc_df['ssm_lon'].fillna(loc_df['Longitude'])
loc_df['eventDate'] = loc_df['date']
loc_df['modified'] = pd.to_datetime('now', utc=True).round(freq='s') 

# constant
loc_df['geodeticDatum'] = 'EPSG:4326'

# Ian's got his ssm_x and ssm_y in km, not in m
loc_df['coordinateUncertaintyInMeters'] = loc_df[['ssm_x_se', 'ssm_y_se']].max(axis=1) * 1000

# revisit uncertainty - make a radius based on the max, but uncertainty is an ellipse
# OBIS doesn't know about it but we can make a Polygon and include it somewhere to preserve the better
# knowledge that we have.

# Where there are multiple hits for a given time step (many satellites have opinions on position at once), 

loc_df = loc_df.sort_values(['device_id', 'date', 'Quality'], ascending=False)
# drop all but the best of the location qualities
loc_df = loc_df.drop_duplicates(subset=['device_id','date'], keep='first', inplace=False)


In [24]:
loc_df['Quality'].unique()

array(['A', 'B', '2', '1', '3', '0', nan, 'G'], dtype=object)

In [25]:
# fallback - where coordinateUncertaintyInMeters is still null (un-QCed), 
# let's do something with the class of fix from Argos. set a lookup table as in the ATN example?

# Ian recommends - dan costa, accuracy of argos locations at sea pinnipeds
# in that article they compared GPS to Argos locations.
# https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0008677

# While this paper recommends different regimes per species due to differences in surfacing behaviour, 
# we don't have that kind of broad data in real-time land.

# So we take their recommendations for marine mammals here, and maybe we'd leave the door open to use a non-mammal error chart
# for non-airbreathers

# Other methodologies have thrown out the A and B quality hits altogther. 
# I'm not opposed to doing that but i'll confirm it with the SME beforehand
# because the QC algorithm is re-positioning the bad hits for us already.
# and if they didn't throw them out, we may not want to either.

# 68th percentile location error distances from Costa et al, in metres
# old LQ designations - CLS moved to a Kalman-filtered location set in ~2011
# error ellipses are now de riguer - semimajor and 
# semiminor ellipse axis/orientation to quantify uncertainty
# So we could harvest those first.

error_table = {3:490,
               2:1010,
               1:1200,
               0:4180,
               -1:6190,
               'A':10280,
               'B':10280,
               'G':490} # TODO : What is the corresponding code to -9 LQ? 
                         # AniMotum thinks it's a class B
missing_errors = loc_df['coordinateUncertaintyInMeters'].isna()
loc_df.loc[missing_errors, 'coordinateUncertaintyInMeters'] = loc_df.loc[missing_errors, 'Quality'].map(error_table)

In [26]:
loc_df['coordinateUncertaintyInMeters'].describe()

count    1233.000000
mean      223.812159
std       280.800119
min        34.650000
25%        91.539000
50%        99.023000
75%       254.133000
max      2219.439000
Name: coordinateUncertaintyInMeters, dtype: float64

In [27]:
loc_df['modified']

669   2024-11-20 18:09:05+00:00
668   2024-11-20 18:09:05+00:00
667   2024-11-20 18:09:05+00:00
666   2024-11-20 18:09:05+00:00
665   2024-11-20 18:09:05+00:00
                 ...           
2     2024-11-20 18:09:05+00:00
671   2024-11-20 18:09:05+00:00
1     2024-11-20 18:09:05+00:00
670   2024-11-20 18:09:05+00:00
0     2024-11-20 18:09:05+00:00
Name: modified, Length: 1234, dtype: datetime64[ns, UTC]

In [28]:
# Select the columns and append to the event_df

event_df = pd.concat([event_df, loc_df[['eventID', 'eventDate', 'decimalLatitude', 'decimalLongitude', 'modified','geodeticDatum', 'coordinateUncertaintyInMeters']]])

In [29]:
event_df

Unnamed: 0,eventID,eventDate,decimalLatitude,decimalLongitude,modified,geodeticDatum,country,coordinateUncertaintyInMeters
0,24A0510-2024-06-21T01:54:00Z,2024-06-21T01:54:00Z,-32.177847,152.521977,2024-11-20 18:03:44+00:00,EPSG:4326,Australia,
669,24A0510-2024-06-21T01:54:00Z-2024-09-05T12:32:32Z,2024-09-05T12:32:32Z,-30.425609,160.366222,2024-11-20 18:09:05+00:00,EPSG:4326,,971.204
668,24A0510-2024-06-21T01:54:00Z-2024-09-05T12:20:17Z,2024-09-05T12:20:17Z,-30.426251,160.362629,2024-11-20 18:09:05+00:00,EPSG:4326,,907.605
667,24A0510-2024-06-21T01:54:00Z-2024-09-05T10:48:53Z,2024-09-05T10:48:53Z,-30.431055,160.335820,2024-11-20 18:09:05+00:00,EPSG:4326,,544.592
666,24A0510-2024-06-21T01:54:00Z-2024-09-05T10:48:12Z,2024-09-05T10:48:12Z,-30.431096,160.335623,2024-11-20 18:09:05+00:00,EPSG:4326,,542.597
...,...,...,...,...,...,...,...,...
2,24A0510-2024-06-21T01:54:00Z-2024-06-21T17:44:32Z,2024-06-21T17:44:32Z,-32.177770,152.521976,2024-11-20 18:09:05+00:00,EPSG:4326,,97.304
671,24A0510-2024-06-21T01:54:00Z-2024-06-21T17:43:24Z,2024-06-21T17:43:24Z,-32.177767,152.521980,2024-11-20 18:09:05+00:00,EPSG:4326,,97.306
1,24A0510-2024-06-21T01:54:00Z-2024-06-21T14:06:15Z,2024-06-21T14:06:15Z,-32.178018,152.522003,2024-11-20 18:09:05+00:00,EPSG:4326,,98.495
670,24A0510-2024-06-21T01:54:00Z-2024-06-21T14:06:13Z,2024-06-21T14:06:13Z,-32.178018,152.522003,2024-11-20 18:09:05+00:00,EPSG:4326,,98.495


In [31]:
# Occurrence entries: occurrenceID = eventID
#                     eventID = eventID
#                     species = species
#                     organismID = body + release_date

loc_df['occurrenceID'] = loc_df['eventID']
loc_df['organismID'] =  loc_df['tag_serial_number'].astype(str).str.cat(loc_df['release_date'].astype(str), sep='-') 

In [32]:
# Decimate to first each hour per animal. Acoustics would also use per-receiver location, argos and sat won't need that.
dets_df = loc_df
dets_df['scientificName'] = dets_df['species']
dets_df['basisOfRecord'] = 'MachineObservation'
dets_df['Date'] = pd.to_datetime(dets_df['date']).dt.date
dets_df['hr'] = pd.to_datetime(dets_df['date']).dt.hour
dets_df['binsize'] = dets_df.groupby(['organismID', 'Date', 'hr']).size().reset_index(name='binsize')['binsize']
dets_df.drop_duplicates(subset=['organismID','Date', 'hr'], keep='first', inplace=True)
dets_df.drop('hr', axis=1, inplace=True)
dets_df

Unnamed: 0,device_id,DeployID,Instr,Date,Type,Quality,Latitude,Longitude,Error radius,Error Semi-major axis,...,decimalLongitude,eventDate,modified,geodeticDatum,coordinateUncertaintyInMeters,occurrenceID,organismID,scientificName,basisOfRecord,binsize
669,4135_265986_24A0510,265986.0,Mk10,2024-09-05,Argos,A,-30.4339,160.3632,2067.0,4197.0,...,160.366222,2024-09-05T12:32:32Z,2024-11-20 18:09:05+00:00,EPSG:4326,971.204,24A0510-2024-06-21T01:54:00Z-2024-09-05T12:32:32Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,1.0
667,4135_265986_24A0510,265986.0,Mk10,2024-09-05,Argos,2,-30.4304,160.3396,320.0,2566.0,...,160.335820,2024-09-05T10:48:53Z,2024-11-20 18:09:05+00:00,EPSG:4326,544.592,24A0510-2024-06-21T01:54:00Z-2024-09-05T10:48:53Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,2.0
665,4135_265986_24A0510,265986.0,Mk10,2024-09-05,Argos,2,-30.4324,160.3074,396.0,910.0,...,160.312203,2024-09-05T09:26:42Z,2024-11-20 18:09:05+00:00,EPSG:4326,383.214,24A0510-2024-06-21T01:54:00Z-2024-09-05T09:26:42Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,1.0
664,4135_265986_24A0510,265986.0,Mk10,2024-09-05,Argos,1,-30.4449,160.2763,547.0,3440.0,...,160.285407,2024-09-05T07:50:50Z,2024-11-20 18:09:05+00:00,EPSG:4326,319.220,24A0510-2024-06-21T01:54:00Z-2024-09-05T07:50:50Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,1.0
663,4135_265986_24A0510,265986.0,Mk10,2024-09-05,Argos,A,-30.4424,160.2704,313.0,3370.0,...,160.268481,2024-09-05T06:45:42Z,2024-11-20 18:09:05+00:00,EPSG:4326,190.821,24A0510-2024-06-21T01:54:00Z-2024-09-05T06:45:42Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,4135_265986_24A0510,265986.0,Mk10,2024-06-21,Argos,1,-32.1853,152.5076,736.0,2997.0,...,152.519059,2024-06-21T21:36:16Z,2024-11-20 18:09:05+00:00,EPSG:4326,343.637,24A0510-2024-06-21T01:54:00Z-2024-06-21T21:36:16Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,1.0
3,4135_265986_24A0510,265986.0,Mk10,2024-06-21,Argos,1,-32.1738,152.5381,580.0,7120.0,...,152.521579,2024-06-21T19:44:29Z,2024-11-20 18:09:05+00:00,EPSG:4326,100.134,24A0510-2024-06-21T01:54:00Z-2024-06-21T19:44:29Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,2.0
2,4135_265986_24A0510,265986.0,Mk10,2024-06-21,Argos,1,-32.1885,152.5367,1408.0,14856.0,...,152.521976,2024-06-21T17:44:32Z,2024-11-20 18:09:05+00:00,EPSG:4326,97.304,24A0510-2024-06-21T01:54:00Z-2024-06-21T17:44:32Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,2.0
1,4135_265986_24A0510,265986.0,Mk10,2024-06-21,Argos,1,-32.1747,152.4790,1162.0,9134.0,...,152.522003,2024-06-21T14:06:15Z,2024-11-20 18:09:05+00:00,EPSG:4326,98.495,24A0510-2024-06-21T01:54:00Z-2024-06-21T14:06:15Z,24A0510-2024-06-21T01:54:00Z,Caretta caretta,MachineObservation,2.0


In [33]:
dets_df['binsize'].describe()

count    563.000000
mean       1.589698
std        1.036882
min        1.000000
25%        1.000000
50%        1.000000
75%        2.000000
max        9.000000
Name: binsize, dtype: float64

In [34]:
dets_df['dataGeneralizations'] = dets_df['binsize'].apply(lambda x: 'subsampled by hour, first of {} record(s)'.format(x))

In [35]:
occ_df = pd.concat([occ_df, dets_df[['occurrenceID', 'eventID', 'scientificName', 'organismID', 'basisOfRecord']]])

In [36]:
# flesh out the occurrence taxonomic entries with kingdom, phylum, class, order, family
import pyworms

lookup_dict = {}
for name in occ_df['scientificName'].unique():
    resp = pyworms.aphiaRecordsByMatchNames(name)
    if len(resp[0]) == 0:
        print('\nNo match for name "{}"'.format(name))
        continue
    elif len(resp[0]) > 1:
        print('\nMultiple matches for name "{}"'.format(name))
        pprint.pprint(resp[0], indent=4)
        continue
    else:
        worms = resp[0][0]
        lookup_dict[name]={'scientificName': name,
                           'scientificNameID': worms['lsid'],
                           'taxonRank': worms['rank'],
                           'kingdom': worms['kingdom'],
                           'phylum': worms['phylum'],
                           'class': worms['class'],
                           'order': worms['order'],
                           'family': worms['family']}
        
lookup_df = pd.DataFrame.from_dict(lookup_dict, orient='index')

In [37]:
lookup_df

Unnamed: 0,scientificName,scientificNameID,taxonRank,kingdom,phylum,class,order,family
Caretta caretta,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae


In [38]:
occ_df = occ_df.join(lookup_df, how='left', on='scientificName', rsuffix='_worms')

In [39]:
occ_df

Unnamed: 0,occurrenceID,organismID,eventID,sex,scientificName,basisOfRecord,scientificName_worms,scientificNameID,taxonRank,kingdom,phylum,class,order,family
0,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z,,Caretta caretta,HumanObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
669,24A0510-2024-06-21T01:54:00Z-2024-09-05T12:32:32Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-09-05T12:32:32Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
667,24A0510-2024-06-21T01:54:00Z-2024-09-05T10:48:53Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-09-05T10:48:53Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
665,24A0510-2024-06-21T01:54:00Z-2024-09-05T09:26:42Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-09-05T09:26:42Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
664,24A0510-2024-06-21T01:54:00Z-2024-09-05T07:50:50Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-09-05T07:50:50Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,24A0510-2024-06-21T01:54:00Z-2024-06-21T21:36:16Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-06-21T21:36:16Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
3,24A0510-2024-06-21T01:54:00Z-2024-06-21T19:44:29Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-06-21T19:44:29Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
2,24A0510-2024-06-21T01:54:00Z-2024-06-21T17:44:32Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-06-21T17:44:32Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae
1,24A0510-2024-06-21T01:54:00Z-2024-06-21T14:06:15Z,24A0510-2024-06-21T01:54:00Z,24A0510-2024-06-21T01:54:00Z-2024-06-21T14:06:15Z,,Caretta caretta,MachineObservation,Caretta caretta,urn:lsid:marinespecies.org:taxname:137205,Species,Animalia,Chordata,,Testudines,Cheloniidae


In [40]:
occ_df['organismID'].unique()

array(['24A0510-2024-06-21T01:54:00Z'], dtype=object)

In [28]:
# Any EMOFs to harvest from detection occurrences?
# 
# 

In [41]:
# Push them out to files and an archive:

occ_df.to_csv('output/loggerhead_occurrences.csv', date_format='%Y-%m-%dT%H:%M:%S')
event_df.to_csv('output/loggerhead_events.csv', date_format='%Y-%m-%dT%H:%M:%S')
# emof_df.to_csv('output/emof.csv', date_format='%Y-%m-%dT%H:%M:%S')

In [30]:
# Zip and ship to an IPT

# Either via a form fill-in, or via depositing the archive on the IPT's filesystem?

# TODO: Try the form-fill first - use the OTN IPT workflows from ipython-utilities
# import requests  # session with the forms themselves
# import selenium  # or pick-n-click

### Debugging cells:

In [31]:
# throwing out all but max lq will help us de-duplicate these same-time-same-tag hits?
loc_df['lq'].describe()

count    35098.000000
mean        -1.341415
std          1.056915
min         -9.000000
25%         -2.000000
50%         -2.000000
75%         -1.000000
max          3.000000
Name: lq, dtype: float64