In [1]:
### WLP_Salmon_Spawning_Survey_DataJoinSummary_v1.py
### Version: 3/2/2022
### Author: Khem So, khem_so@fws.gov, (503) 231-6839
### Abstract: This Python 3 script pulls data from the HI Waterbirds Reproductive Success ArcGIS Online feature service and performs joins and merges to result in a combined CSV dataset.

In [2]:
import arcpy
import pandas as pd
from arcgis import GIS
import time, os, fnmatch, shutil
import openpyxl

In [3]:
### ArcGIS Online stores date-time information in UTC by default. This function uses the pytz package to convert time zones and can be used to convert from UTC ("UTC") to localized time. For example, localized "US/Pacific" is either Pacific Standard Time UTC-8 or Pacific Daylight Time UTC-7 depending upon time of year.
from datetime import datetime
from pytz import timezone
def change_timezone_of_field(df, source_date_time_field, new_date_time_field_suffix, source_timezone, new_timezone):
    """Returns the values in *source_date_time_field* with its timezone converted to a new timezone within a new field *new_date_time_field*
    : param df: The name of the spatially enabled or pandas DataFrame containing datetime fields
    : param source_date_time_field: The name of the datetime field whose timezone is to be changed
    : param new_date_time_field_suffix: Suffix appended to the end of the name of the source datetime field. This is used to create the new date time field name.
    : param source_timezone: The name of the source timezone
    : param new_timezone: The name of the converted timezone. For possible values, see https://gist.github.com/heyalexej/8bf688fd67d7199be4a1682b3eec7568
    """
    # Define the source timezone in the source_date_time_field
    df[source_date_time_field] = df[source_date_time_field].dt.tz_localize(source_timezone)
    # Define the name of the new date time field
    new_date_time_field = source_date_time_field + new_date_time_field_suffix
    # Convert the datetime in the source_date_time_field to the new timezone in a new field called new_date_time_field
    df[new_date_time_field] = df[source_date_time_field].dt.tz_convert(new_timezone)

In [4]:
### Allow authentication via login to U.S. Fish & Wildlife Service ArcGIS Online account via ArcGIS Pro
gis = GIS("pro")

In [5]:
### Enter year of interest
# uncomment next line to use ArcGIS interface, otherwise hard coding year
# year = arcpy.GetParameterAsText(0)
year = "2021"

In [6]:
### Enter path for local file saving
# uncomment next line to use ArcGIS interface, otherwise hard coding out_workspace
# out_workspace = arcpy.GetParameterAsText(1)
out_workspace = "C:/Users/kso/Desktop/"

In [7]:
### Create timestamp for file naming
t = time.localtime()
timestamp = time.strftime('%Y-%m-%d_%H%M', t)

In [8]:
### Paths to ArcGIS Online data
# To populate Service ItemId, go to Feature Service webpage and in bottom right corner, click on the View link.
# Current Feature Service webpage: https://fws.maps.arcgis.com/home/item.html?id=758626eec0fc4bc1a72b4e4c9bd1023c
ServiceItemID = gis.content.get("758626eec0fc4bc1a72b4e4c9bd1023c")

### There are separate methods for pulling spatial versus non-spatial data into Python. Spatial layers will become Spatially Enabled DataFrame objects. Non-spatial data will become regular pandas DataFrame objects.
## Define variables pointing to spatial layers
MetadataLyr = ServiceItemID.layers[0]
LiveFishLyr = ServiceItemID.layers[1]
CarcassLyr = ServiceItemID.layers[2]
## Create Spatially Enabled DataFrame objects
sedfMetadata = pd.DataFrame.spatial.from_layer(MetadataLyr)
sedfLiveFishLocation = pd.DataFrame.spatial.from_layer(LiveFishLyr)
sedfCarcassLocation = pd.DataFrame.spatial.from_layer(CarcassLyr)

## Define variables point to non-spatial (tabular) data
Observer = r"https://services.arcgis.com/QVENGdaPbd4LUkLV/arcgis/rest/services/service_c555c76424ca452d8dab8de4f8c25000/FeatureServer/3"

## Convert AGOL table to NumPy Array and then to pandas DataFrames
naObserver = arcpy.da.TableToNumPyArray(Observer,["objectid","globalid","strFirstName","strLastName","parentglobalid","CreationDate","Creator","EditDate","Editor"])
dfObserver = pd.DataFrame(naObserver)

In [9]:
## Use change_timezone_of_field function to convert all datetime fields in dataframe from UTC to Pacific within new field with _Pacific suffix
for col in sedfMetadata.columns:
     if sedfMetadata[col].dtype == 'datetime64[ns]':
         change_timezone_of_field(sedfMetadata, col, "_Pacific", "UTC", "US/Pacific")

for col in sedfLiveFishLocation.columns:
     if sedfLiveFishLocation[col].dtype == 'datetime64[ns]':
         change_timezone_of_field(sedfLiveFishLocation, col, "_Pacific", "UTC", "US/Pacific")

for col in sedfCarcassLocation.columns:
     if sedfCarcassLocation[col].dtype == 'datetime64[ns]':
         change_timezone_of_field(sedfCarcassLocation, col, "_Pacific", "UTC", "US/Pacific")

for col in dfObserver.columns:
     if dfObserver[col].dtype == 'datetime64[ns]':
         change_timezone_of_field(dfObserver, col, "_Pacific", "UTC", "US/Pacific")

In [10]:
## Filter sedfMetadata by single year
sedfMetadata = sedfMetadata[sedfMetadata["dtmDate"].dt.strftime('%Y') == year]

In [11]:
## Process dfObserver to get single concatenated field for full name
dfObserver["strFullName"] = dfObserver["strFirstName"] + " " + dfObserver["strLastName"]

In [12]:
## Process dfObserver to remove curly brackets
dfObserver = dfObserver.replace("{","", regex=True)
dfObserver = dfObserver.replace("}","", regex=True)

In [13]:
## Process dfObserver to get concatenated list of full surveyor names by survey
dfObserver2 = dfObserver[["parentglobalid", "strFullName"]]
dfObserver2 = dfObserver2.groupby("parentglobalid").agg({"strFullName": ', '.join})
dfObserver2

Unnamed: 0_level_0,strFullName
parentglobalid,Unnamed: 1_level_1
007cdb56-b174-4638-a7bb-08d67f76d384,Khem So
039445f4-7c6b-4890-8cf2-ff66904fa241,William Ritchie
06aa11f4-c393-4a3d-877a-f534b20e2cb9,Kelsey Lotz
0b952e22-146a-4564-b93c-c6b3f40b15be,"Khem So, Will Ritchie"
1105e0ef-cd4d-4202-a195-a1337029aa7c,Kelsey Lotz
...,...
fb924f39-7322-4a7b-84dc-b4d8948e2a06,William Ritchie
fc84e779-b16f-4a89-bf18-526cdb30efef,Marie Fernandez
fcbbb288-8088-4cb7-a465-2c82fae6ecfa,Charlie Pelizza
fdccca34-d345-4133-a9ee-ea5aa3894247,Kelsey Lotz


In [22]:
## Join sedfMetadata with dfObserver
dfMetadataObserver = pd.merge(sedfMetadata,dfObserver2, how="left", left_on="globalid", right_on="parentglobalid")

In [23]:
## Strip time from dtmDate_Pacific
dfMetadataObserver["dtmDate_Pacific"] = dfMetadataObserver["dtmDate_Pacific"].dt.strftime('%m/%d/%Y')
## Reset dfMetadataObserver in desired order and drop unneeded fields
dfMetadataObserver = dfMetadataObserver[["globalid", "strStream", "dtmDate_Pacific", "strFullName", "strTideStart", "strWeather", "dtmManualTimeStart", "dtmManualTimeTurn", "dtmManualTimeEnd", "strStreamFlow", "strViewingConditions", "strViewingConditionsComments", "ysnLiveFish", "ysnCarcasses", "strComments"]]

In [24]:
list(dfMetadataObserver)

['globalid',
 'strStream',
 'dtmDate_Pacific',
 'strFullName',
 'strTideStart',
 'strWeather',
 'dtmManualTimeStart',
 'dtmManualTimeTurn',
 'dtmManualTimeEnd',
 'strStreamFlow',
 'strViewingConditions',
 'strViewingConditionsComments',
 'ysnLiveFish',
 'ysnCarcasses',
 'strComments']

In [25]:
## Join dfMetadataObserver with sedfLiveFishLocation
dfMetadataObserverLiveFish = pd.merge(dfMetadataObserver,sedfLiveFishLocation, how="inner", left_on="globalid", right_on="parentglobalid")
## Define dfMetadataObserverLiveFish sort order
dfMetadataObserverLiveFish = dfMetadataObserverLiveFish.sort_values(by=["strStream", "dtmDate_Pacific"])
dfMetadataObserverLiveFish

Unnamed: 0,globalid_x,strStream,dtmDate_Pacific,strFullName,strTideStart,strWeather,dtmManualTimeStart,dtmManualTimeTurn,dtmManualTimeEnd,strStreamFlow,...,parentglobalid,CreationDate,Creator,EditDate,Editor,strLiveFishRedd,strReddID,SHAPE,CreationDate_Pacific,EditDate_Pacific
30,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.618999958+00:00,kelsey_lotz@fws.gov_fws,2022-02-08 16:25:43.576999903+00:00,kelsey_lotz@fws.gov_fws,,,"{'x': -123.9406076513336, 'y': 46.360168926898...",2021-10-29 11:23:39.618999958-07:00,2022-02-08 08:25:43.576999903-08:00
31,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.618999958+00:00,kelsey_lotz@fws.gov_fws,2022-02-08 16:25:43.576999903+00:00,kelsey_lotz@fws.gov_fws,,,"{'x': -123.9406843107998, 'y': 46.360182041407...",2021-10-29 11:23:39.618999958-07:00,2022-02-08 08:25:43.576999903-08:00
32,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.618999958+00:00,kelsey_lotz@fws.gov_fws,2022-02-08 16:25:43.576999903+00:00,kelsey_lotz@fws.gov_fws,,,"{'x': -123.94089342770867, 'y': 46.36016371366...",2021-10-29 11:23:39.618999958-07:00,2022-02-08 08:25:43.576999903-08:00
33,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.618999958+00:00,kelsey_lotz@fws.gov_fws,2022-02-08 16:25:43.576999903+00:00,kelsey_lotz@fws.gov_fws,,,"{'x': -123.94107736322806, 'y': 46.36019102213...",2021-10-29 11:23:39.618999958-07:00,2022-02-08 08:25:43.576999903-08:00
34,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.618999958+00:00,kelsey_lotz@fws.gov_fws,2022-02-08 16:25:43.576999903+00:00,kelsey_lotz@fws.gov_fws,,,"{'x': -123.94129174277744, 'y': 46.36032236049...",2021-10-29 11:23:39.618999958-07:00,2022-02-08 08:25:43.576999903-08:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
448,7e583a8a-6d07-4a49-9adc-71a5d4697b4f,North Creek,11/17/2021,Marie Fernandez,,Partly Cloudy,10:30,11:15,12:44,High,...,7e583a8a-6d07-4a49-9adc-71a5d4697b4f,2022-01-11 23:22:12.785000086+00:00,kelsey_lotz@fws.gov_fws,2022-01-11 23:22:12.785000086+00:00,kelsey_lotz@fws.gov_fws,Live Fish,,"{'x': 0, 'y': 0, 'spatialReference': {'wkid': ...",2022-01-11 15:22:12.785000086-08:00,2022-01-11 15:22:12.785000086-08:00
449,4a3f2536-f169-4858-8fb8-5a060f3c96d6,North Creek,11/29/2021,Marie Fernandez,,Overcast/Drizzle,10:50,11:31,12:38,High,...,4a3f2536-f169-4858-8fb8-5a060f3c96d6,2022-01-11 23:26:28.272000074+00:00,kelsey_lotz@fws.gov_fws,2022-01-11 23:26:28.272000074+00:00,kelsey_lotz@fws.gov_fws,Live Fish,,"{'x': 0, 'y': 0, 'spatialReference': {'wkid': ...",2022-01-11 15:26:28.272000074-08:00,2022-01-11 15:26:28.272000074-08:00
450,a33bab8a-c6eb-494c-9156-19cec4d5abf6,North Creek,12/16/2021,Marie Fernandez,,Partly Cloudy,09:43,10:38,11:46,High,...,a33bab8a-c6eb-494c-9156-19cec4d5abf6,2022-01-11 23:29:53.451999903+00:00,kelsey_lotz@fws.gov_fws,2022-01-11 23:29:53.451999903+00:00,kelsey_lotz@fws.gov_fws,Live Fish,,"{'x': 0, 'y': 0, 'spatialReference': {'wkid': ...",2022-01-11 15:29:53.451999903-08:00,2022-01-11 15:29:53.451999903-08:00
451,a33bab8a-c6eb-494c-9156-19cec4d5abf6,North Creek,12/16/2021,Marie Fernandez,,Partly Cloudy,09:43,10:38,11:46,High,...,a33bab8a-c6eb-494c-9156-19cec4d5abf6,2022-01-11 23:29:53.451999903+00:00,kelsey_lotz@fws.gov_fws,2022-01-11 23:29:53.451999903+00:00,kelsey_lotz@fws.gov_fws,Live Fish,,"{'x': 0, 'y': 0, 'spatialReference': {'wkid': ...",2022-01-11 15:29:53.451999903-08:00,2022-01-11 15:29:53.451999903-08:00


In [31]:
## Join dfMetadataObserver with sedfCarcassLocation
dfMetadataObserverCarcasses = pd.merge(dfMetadataObserver,sedfCarcassLocation, how="inner", left_on="globalid", right_on="parentglobalid")
## Define dfMetadataObserverCarcasses sort order
dfMetadataObserverCarcasses = dfMetadataObserverCarcasses.sort_values(by=["strStream", "dtmDate_Pacific"])
dfMetadataObserverCarcasses

Unnamed: 0,globalid_x,strStream,dtmDate_Pacific,strFullName,strTideStart,strWeather,dtmManualTimeStart,dtmManualTimeTurn,dtmManualTimeEnd,strStreamFlow,...,ysnCountedLast,parentglobalid,CreationDate,Creator,EditDate,Editor,intNumCarcasses,SHAPE,CreationDate_Pacific,EditDate_Pacific
0,2a9082d2-c328-4ff2-abbe-92f5e570beca,Chum Creek,10/29/2021,Kelsey Lotz,"High, falling",Clear,10:37,,11:27,High,...,no,2a9082d2-c328-4ff2-abbe-92f5e570beca,2021-10-29 18:23:39.588000059+00:00,kelsey_lotz@fws.gov_fws,2021-10-29 18:30:19.933000088+00:00,kelsey_lotz@fws.gov_fws,,"{'x': -123.94171459905576, 'y': 46.36027177949...",2021-10-29 11:23:39.588000059-07:00,2021-10-29 11:30:19.933000088-07:00
14,b18399a6-4756-4f9a-8700-3bd39bcf24a7,Chum Creek,11/05/2021,Kelsey Lotz,"High, rising",Rain,12:06,,13:09,High,...,no,b18399a6-4756-4f9a-8700-3bd39bcf24a7,2021-11-08 17:07:06.720999956+00:00,kelsey_lotz@fws.gov_fws,2021-11-08 17:07:06.720999956+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.94091809920552, 'y': 46.36004123088...",2021-11-08 09:07:06.720999956-08:00,2021-11-08 09:07:06.720999956-08:00
15,b18399a6-4756-4f9a-8700-3bd39bcf24a7,Chum Creek,11/05/2021,Kelsey Lotz,"High, rising",Rain,12:06,,13:09,High,...,no,b18399a6-4756-4f9a-8700-3bd39bcf24a7,2021-11-08 17:07:06.720999956+00:00,kelsey_lotz@fws.gov_fws,2021-11-08 17:07:06.720999956+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.94099283331896, 'y': 46.36007151952...",2021-11-08 09:07:06.720999956-08:00,2021-11-08 09:07:06.720999956-08:00
89,1105e0ef-cd4d-4202-a195-a1337029aa7c,Chum Creek,11/16/2021,Kelsey Lotz,,Partly Cloudy,12:30,,13:24,High,...,no,1105e0ef-cd4d-4202-a195-a1337029aa7c,2021-11-16 21:31:50.336999893+00:00,kelsey_lotz@fws.gov_fws,2021-11-16 21:31:50.336999893+00:00,kelsey_lotz@fws.gov_fws,2.0,"{'x': -123.94042512406746, 'y': 46.36007616326...",2021-11-16 13:31:50.336999893-08:00,2021-11-16 13:31:50.336999893-08:00
90,1105e0ef-cd4d-4202-a195-a1337029aa7c,Chum Creek,11/16/2021,Kelsey Lotz,,Partly Cloudy,12:30,,13:24,High,...,no,1105e0ef-cd4d-4202-a195-a1337029aa7c,2021-11-16 21:31:50.336999893+00:00,kelsey_lotz@fws.gov_fws,2021-11-16 21:31:50.336999893+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.94159108158456, 'y': 46.36020586411...",2021-11-16 13:31:50.336999893-08:00,2021-11-16 13:31:50.336999893-08:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,474149bd-098f-4a5b-9c18-4c77bfa71a78,Lost Creek,12/14/2021,Kelsey Lotz,,Partly cloudy,11:18,,12:18,High,...,yes,474149bd-098f-4a5b-9c18-4c77bfa71a78,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.93921236381108, 'y': 46.35909084470...",2021-12-15 08:22:42.542000055-08:00,2021-12-15 08:22:42.542000055-08:00
220,474149bd-098f-4a5b-9c18-4c77bfa71a78,Lost Creek,12/14/2021,Kelsey Lotz,,Partly cloudy,11:18,,12:18,High,...,yes,474149bd-098f-4a5b-9c18-4c77bfa71a78,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,2.0,"{'x': -123.9380408773776, 'y': 46.358726501248...",2021-12-15 08:22:42.542000055-08:00,2021-12-15 08:22:42.542000055-08:00
221,474149bd-098f-4a5b-9c18-4c77bfa71a78,Lost Creek,12/14/2021,Kelsey Lotz,,Partly cloudy,11:18,,12:18,High,...,,474149bd-098f-4a5b-9c18-4c77bfa71a78,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.9378167546076, 'y': 46.358710608907...",2021-12-15 08:22:42.542000055-08:00,2021-12-15 08:22:42.542000055-08:00
222,474149bd-098f-4a5b-9c18-4c77bfa71a78,Lost Creek,12/14/2021,Kelsey Lotz,,Partly cloudy,11:18,,12:18,High,...,yes,474149bd-098f-4a5b-9c18-4c77bfa71a78,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,2021-12-15 16:22:42.542000055+00:00,kelsey_lotz@fws.gov_fws,1.0,"{'x': -123.93637331650233, 'y': 46.35780280272...",2021-12-15 08:22:42.542000055-08:00,2021-12-15 08:22:42.542000055-08:00


In [34]:
## Convert Python date time into format Excel can read more easily
archive_dt_field_list = dfMetadataObserverLiveFish.select_dtypes(include=['datetime64[ns, UTC]','datetime64[ns, US/Pacific]'])
for col in archive_dt_field_list:
    dfMetadataObserverLiveFish[col] = dfMetadataObserverLiveFish[col].dt.strftime('%m/%d/%Y %H:%M:%S %Z%z')
archive_dt_field_list = dfMetadataObserverCarcasses.select_dtypes(include=['datetime64[ns, UTC]','datetime64[ns, US/Pacific]'])
for col in archive_dt_field_list:
    dfMetadataObserverCarcasses[col] = dfMetadataObserverCarcasses[col].dt.strftime('%m/%d/%Y %H:%M:%S %Z%z')

In [35]:
### Create export paths
writer = pd.ExcelWriter(os.path.join(out_workspace,('WLP_Salmon_Spawning_Survey_' + timestamp + '.xlsx')))
dfMetadataObserver.to_excel(writer, 'Metadata')
dfMetadataObserverLiveFish.to_excel(writer, 'Live Fish')
dfMetadataObserverCarcasses.to_excel(writer, 'Carcasses')
writer.save()