In [1]:
### WLP_Salmon_Spawning_Survey_DataJoinSummary_v2.py
### Version: 02/13/2025
### Author: Khem So, khem_so@fws.gov, (971) 282-2193
### Abstract: This Python 3 script pulls data from the Willapa NWR salmon spawning survey downloaded file geodatabase and performs joins and merges to result in a combined Excel dataset.

In [2]:
import arcpy
import pandas as pd
from arcgis import GIS
from arcgis.features import GeoAccessor
import time, os

In [3]:
arcpy.AddMessage("Starting...")

### ArcGIS Online stores date-time information in UTC by default. This function uses the pytz package to convert time zones and can be used to convert from UTC ("UTC") to localized time. For example, localized "US/Pacific" is either Pacific Standard Time UTC-8 or Pacific Daylight Time UTC-7 depending upon time of year.
from datetime import datetime
from pytz import timezone
def change_timezone_of_field(df, source_date_time_field, new_date_time_field_suffix, source_timezone, new_timezone):
    """Returns the values in *source_date_time_field* with its timezone converted to a new timezone within a new field *new_date_time_field*
    : param df: The name of the spatially enabled or pandas DataFrame containing datetime fields
    : param source_date_time_field: The name of the datetime field whose timezone is to be changed
    : param new_date_time_field_suffix: Suffix appended to the end of the name of the source datetime field. This is used to create the new date time field name.
    : param source_timezone: The name of the source timezone
    : param new_timezone: The name of the converted timezone. For possible values, see https://gist.github.com/heyalexej/8bf688fd67d7199be4a1682b3eec7568
    """
    # Define the source timezone in the source_date_time_field
    df[source_date_time_field] = df[source_date_time_field].dt.tz_localize(source_timezone)
    # Define the name of the new date time field
    new_date_time_field = f"{source_date_time_field}{new_date_time_field_suffix}"
    # Convert the datetime in the source_date_time_field to the new timezone in a new field called new_date_time_field
    df[new_date_time_field] = df[source_date_time_field].dt.tz_convert(new_timezone)

### This function converts Python datetime64 fields to %m/%d/%Y %H:%M:%S %Z%z format
def archive_dt_field(df):
    """Selects fields with data types of 'datetime64[ns, UTC]','datetime64[ns, US/Pacific]' and converts to %m/%d/%Y %H:%M:%S %Z%z format for archiving to Excel
    : param df: The name of the spatially enabled or pandas DataFrame containing datetime fields
    """
    archive_dt_field_list = df.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns, US/Pacific]', 'datetime64'])
    for col in archive_dt_field_list:
        df[col] = df[col].dt.strftime('%m/%d/%Y %H:%M:%S %Z%z')


In [4]:
### Allow authentication via login to U.S. Fish & Wildlife Service ArcGIS Online account via ArcGIS Pro
gis = GIS("pro")

In [5]:
### Enter year of interest
# uncomment next line to use ArcGIS interface, otherwise hard coding year
# year = arcpy.GetParameterAsText(0)
year = "2024"

### Enter path for local file saving
# uncomment next line to use ArcGIS interface, otherwise hard coding out_workspace
# out_workspace = arcpy.GetParameterAsText(1)
out_workspace = r"C:\Users\kso\OneDrive - DOI\Desktop"

### Create timestamp for file naming
t = time.localtime()
timestamp = time.strftime('%Y-%m-%d_%H%M', t)

In [6]:
### Path to file geodatabase
fgdb = r"C:\Users\kso\OneDrive - DOI\WORKSPACE\WLP_Salmon_Spawning_Survey\data\WLP_Salmon_Spawning_v1_20250213_1532\094e59f0-0d8e-49fe-824c-0990c3fb005c.gdb"

In [7]:
## Create Spatially Enabled DataFrame objects
sedfMetadata = pd.DataFrame.spatial.from_featureclass(fgdb + "\\WLP_Salmon_Spawning_v1")
sedfLiveFishLocation = pd.DataFrame.spatial.from_featureclass(fgdb + "\\tblLiveFish")
sedfCarcassLocation = pd.DataFrame.spatial.from_featureclass(fgdb + "\\tblCarcasses")

dfObserver = pd.DataFrame(GeoAccessor.from_table(fgdb + "\\lkupObserver"))

In [8]:
### Convert integer timestamps to datetime
sedfCarcassLocation['CreationDate'] = pd.to_datetime(sedfCarcassLocation['CreationDate'], utc=True, unit='ms')
sedfCarcassLocation['EditDate'] = pd.to_datetime(sedfCarcassLocation['EditDate'], utc=True, unit='ms')

In [9]:
### Use change_timezone_of_field function to convert all datetime fields in dataframe from UTC to Pacific within new field with _Pacific suffix
for df in [sedfMetadata, sedfLiveFishLocation, sedfCarcassLocation, dfObserver]:
    for col in df.select_dtypes(include=['datetime64']).columns:
        change_timezone_of_field(df, col, "_Pacific", "UTC", "US/Pacific")

In [10]:
### Filter sedfMetadata by single year
sedfMetadataYYYY = sedfMetadata[sedfMetadata["dtmDate"].dt.strftime('%Y') == year]

In [11]:
sedfCarcassLocation

Unnamed: 0,objectid,globalid,strCarcassSpecies,strCarcassSex,strDecomposedFresh,ysnCountedLast,parentglobalid,CreationDate,Creator,EditDate,Editor,intNumCarcasses,SHAPE
0,1,{CEF1D7D3-986B-42E4-9AAB-426A90C10A70},,,,,{13DB6347-D486-4D4B-B7CD-B907CB59B342},2017-11-03 17:37:52+00:00,khem_so@fws.gov_fws,2017-11-03 17:37:52+00:00,khem_so@fws.gov_fws,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860..."
1,2,{57422E7A-F717-45CB-8155-F90C809BD236},,,,,{2C561A5C-38E7-4105-9615-5F313FAD5130},2017-11-13 02:20:11+00:00,khem_so@fws.gov_fws,2017-11-13 02:20:11+00:00,khem_so@fws.gov_fws,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860..."
2,3,{22FF6809-6C6B-4239-8273-88B0C0B3073E},,,,,{737E4A51-41AC-4305-B5B1-47899864C263},2017-11-18 18:40:14+00:00,khem_so@fws.gov_fws,2017-11-18 18:40:14+00:00,khem_so@fws.gov_fws,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860..."
3,4,{EFC26E57-9F6C-4FD7-845A-FC5B68A9B81E},,,,,{6AB24EA3-4F27-48CA-9492-D307FCCBBD4B},2017-11-27 00:00:39+00:00,khem_so@fws.gov_fws,2017-11-27 00:00:39+00:00,khem_so@fws.gov_fws,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860..."
4,5,{E2FEEB46-1BA2-47D4-A768-FE1646C591A7},,,,,{007CDB56-B174-4638-A7BB-08D67F76D384},2017-12-02 19:06:52+00:00,khem_so@fws.gov_fws,2017-12-02 19:06:52+00:00,khem_so@fws.gov_fws,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1212,1214,{46FCE1E4-2E7E-42DC-88B9-F6E1E633083A},Unk,Unk,Decomposed,yes,{9349BB62-D1E4-4E37-80DB-B6BCF4915A8B},2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,1,"{""x"": -123.93848952099995, ""y"": 46.35895357300..."
1213,1215,{E92C1EE8-2C3D-426B-BFC5-06476B98BB6C},Oncorhynchus keta,Unk,Decomposed,yes,{9349BB62-D1E4-4E37-80DB-B6BCF4915A8B},2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,3,"{""x"": -123.93933756199999, ""y"": 46.35908724300..."
1214,1216,{B104F780-B9C1-4B5B-B1D8-034B044AF1DA},Oncorhynchus keta,Unk,Decomposed,yes,{9349BB62-D1E4-4E37-80DB-B6BCF4915A8B},2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,1,"{""x"": -123.93974844899998, ""y"": 46.35938469800..."
1215,1217,{C9E51E33-1E05-43C6-8C07-C4272EF1F0E2},Oncorhynchus keta,Unk,Decomposed,yes,{9349BB62-D1E4-4E37-80DB-B6BCF4915A8B},2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,2024-12-18 19:01:12.000001+00:00,sierra_earle@fws.gov_fws,1,"{""x"": -123.93997823899997, ""y"": 46.35941630400..."


In [12]:
### Export raw data frames as backup
## Use archive_dt_field function to convert Python date time into format Excel can read more easily
archive_dt_field(sedfMetadata)
archive_dt_field(sedfLiveFishLocation)
archive_dt_field(sedfCarcassLocation)
archive_dt_field(dfObserver)

## Create export paths for backup and writes to Excel spreadsheet
writer = pd.ExcelWriter(os.path.join(out_workspace,('WLP_Salmon_Spawning_Survey_BKUP_' + timestamp + '.xlsx')))
sedfMetadata.to_excel(writer, 'Metadata', index=False)
sedfLiveFishLocation.to_excel(writer, 'Live Fish', index=False)
sedfCarcassLocation.to_excel(writer, 'Carcasses', index=False)
dfObserver.to_excel(writer, 'Observers', index=False)
writer.close()

arcpy.AddMessage("Exported raw data as Excel spreadsheet for backup...")

In [13]:
### Create dfObserver2 data frame with concatenated surveyor names grouped by parentglobalid
## Clean up names
dfObserver["strFirstName"] = dfObserver["strFirstName"].str.strip()
dfObserver["strLastName"] = dfObserver["strLastName"].str.strip()

## Process dfObserver to get single concatenated field for full name
dfObserver["strFullName"] = dfObserver["strFirstName"] + " " + dfObserver["strLastName"]

## Process dfObserver to remove curly brackets to allow for join based on GUID
dfObserver = dfObserver.replace("{","", regex=True)
dfObserver = dfObserver.replace("}","", regex=True)

## Process dfObserver to get concatenated list of full surveyor names by survey
dfObserver2 = dfObserver[["parentglobalid", "strFullName"]]
dfObserver2 = dfObserver2.groupby("parentglobalid").agg({"strFullName": ', '.join})

In [14]:
### Join sedfMetadataYYYY with dfObserver
dfMetadataObserver = pd.merge(sedfMetadataYYYY,dfObserver2, how="left", left_on="globalid", right_on="parentglobalid")

In [15]:
### Manipulate date/time fields in dfMetadataObserver
## Strip time from dtmDate_Pacific
dfMetadataObserver["dtmDate_Pacific"] = dfMetadataObserver["dtmDate_Pacific"].dt.strftime('%m/%d/%Y')

## Calculate total survey time
dfMetadataObserver["dtmManualTimeStart_dt"] = dfMetadataObserver["dtmDate_Pacific"] + " " + dfMetadataObserver["dtmManualTimeStart"]
dfMetadataObserver["dtmManualTimeStart_dt"] = pd.to_datetime(dfMetadataObserver["dtmManualTimeStart_dt"],format="%m/%d/%Y %H:%M")

dfMetadataObserver["dtmManualTimeEnd_dt"] = dfMetadataObserver["dtmDate_Pacific"] + " " + dfMetadataObserver["dtmManualTimeEnd"]
dfMetadataObserver["dtmManualTimeEnd_dt"] = pd.to_datetime(dfMetadataObserver["dtmManualTimeEnd_dt"],format="%m/%d/%Y %H:%M")

dfMetadataObserver["dtmManualTimeTotal"] = dfMetadataObserver["dtmManualTimeEnd_dt"] - dfMetadataObserver["dtmManualTimeStart_dt"]

dfMetadataObserver["dtmManualTimeTotal"] = (dfMetadataObserver["dtmManualTimeTotal"]).astype(str)

In [16]:
### Reset dfMetadataObserver in desired order and drop unneeded fields
dfMetadataObserver = dfMetadataObserver[["globalid", "strStream", "dtmDate_Pacific", "strFullName", "strTideStart", "strWeather", "dtmManualTimeStart", "dtmManualTimeTurn", "dtmManualTimeEnd", "dtmManualTimeTotal", "strStreamFlow", "strViewingConditions", "strViewingConditionsComments", "ysnLiveFish", "ysnCarcasses", "strComments", "CreationDate_Pacific"]]

In [17]:
### Join dfMetadataObserver with sedfLiveFishLocation
dfMetadataObserverLiveFish = pd.merge(dfMetadataObserver,sedfLiveFishLocation, how="inner", left_on="globalid", right_on="parentglobalid")

## Reset dfMetadataObserverLiveFish in desired order and drop unneeded fields
dfMetadataObserverLiveFish = dfMetadataObserverLiveFish[['globalid_x', 'strStream', 'dtmDate_Pacific', 'ysnLiveFish', 'globalid_y', 'strLiveSpecies', 'strLiveSex', 'ysnPairs', 'ysnReddBuilding', 'intNumRedds', 'strLiveFishRedd', 'strReddID', 'SHAPE', 'CreationDate_Pacific_x']]
## Define dfMetadataObserverLiveFish sort order
dfMetadataObserverLiveFish = dfMetadataObserverLiveFish.sort_values(by=["strStream", "dtmDate_Pacific"])

In [18]:
### Join dfMetadataObserver with sedfCarcassLocation
dfMetadataObserverCarcasses = pd.merge(dfMetadataObserver,sedfCarcassLocation, how="inner", left_on="globalid", right_on="parentglobalid")
## Reset dfMetadataObserverCarcasses in desired order and drop unneeded fields
dfMetadataObserverCarcasses = dfMetadataObserverCarcasses[['globalid_x', 'strStream', 'dtmDate_Pacific', 'ysnCarcasses', 'globalid_y', 'strCarcassSpecies', 'strCarcassSex', 'strDecomposedFresh', 'intNumCarcasses', 'ysnCountedLast', 'SHAPE', 'CreationDate_Pacific']]
## Define dfMetadataObserverCarcasses sort order
dfMetadataObserverCarcasses = dfMetadataObserverCarcasses.sort_values(by=["strStream", "dtmDate_Pacific"])

In [19]:
### Live fish data entered prior to 11/5/2021 are in different format so before/after data frames needed
dfMetadataObserverLiveFish_before20211105 = dfMetadataObserverLiveFish[(dfMetadataObserverLiveFish['CreationDate_Pacific_x'] < "11/05/2021")]
dfMetadataObserverLiveFish_after20211105 = dfMetadataObserverLiveFish[(dfMetadataObserverLiveFish['CreationDate_Pacific_x'] >= "11/05/2021")]

dfMetadataObserverLiveFish_before20211105 = dfMetadataObserverLiveFish_before20211105.copy()
dfMetadataObserverLiveFish_after20211105 = dfMetadataObserverLiveFish_after20211105.copy()

In [20]:
### Create fields for counting live fish entered before 11/5/2021
dfMetadataObserverLiveFish_before20211105.loc[dfMetadataObserverLiveFish_before20211105['ysnReddBuilding'] == "yes", ['intReddBuilding']] = 1
dfMetadataObserverLiveFish_before20211105.loc[dfMetadataObserverLiveFish_before20211105['ysnPairs'] == "yes", ['dblPairs']] = 0.5
dfMetadataObserverLiveFish_before20211105.loc[dfMetadataObserverLiveFish_before20211105['strLiveSex'] == "M", ['intMales']] = 1
dfMetadataObserverLiveFish_before20211105.loc[dfMetadataObserverLiveFish_before20211105['strLiveSex'] == "F", ['intFemales']] = 1
dfMetadataObserverLiveFish_before20211105.loc[dfMetadataObserverLiveFish_before20211105['strLiveSex'] == "Unk", ['intUnknown']] = 1

## Group by GUID, stream, date, and species; sum the numeric fields
dfLiveFishSummary1 = dfMetadataObserverLiveFish_before20211105.groupby(['globalid_x', 'strLiveSpecies'], as_index=False, dropna= False).agg(
    intNumRedds=('intNumRedds', 'sum'),
    intReddBuilding=('intReddBuilding', 'sum'),
    dblPairs=('dblPairs', 'sum'),
    intMales=('intMales', 'sum'),
    intFemales=('intFemales', 'sum'),
    intUnknown=('intUnknown', 'sum')
)

## Create field for sum of live fish
dfLiveFishSummary1['intLiveFish'] = dfLiveFishSummary1[['intMales', 'intFemales', 'intUnknown']].sum(axis=1)
dfLiveFishSummary1

Unnamed: 0,globalid_x,strLiveSpecies,intNumRedds,intReddBuilding,dblPairs,intMales,intFemales,intUnknown,intLiveFish


In [21]:
dfMetadataObserverLiveFish_after20211105

Unnamed: 0,globalid_x,strStream,dtmDate_Pacific,ysnLiveFish,globalid_y,strLiveSpecies,strLiveSex,ysnPairs,ysnReddBuilding,intNumRedds,strLiveFishRedd,strReddID,SHAPE,CreationDate_Pacific_x
0,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{19FFBEE1-62EC-4B5B-8E70-E3DA70345A83},Oncorhynchus keta,,yes,,,Live Fish and Redd,24_chum_11,"{""x"": -123.936776039, ""y"": 46.35789915400005, ...",2024-11-05 13:57:04-08:00
1,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{CA387F9D-1A40-4823-83B0-25A9B38461E7},Oncorhynchus keta,,yes,,,Live Fish and Redd,24_Chum_9,"{""x"": -123.93674519399997, ""y"": 46.35790351100...",2024-11-05 13:57:04-08:00
2,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{FAA6A6FD-BA93-4A32-89DA-EBBBFF0A5C90},Oncorhynchus keta,,yes,,,Live Fish and Redd,24_Chum_9,"{""x"": -123.93676287499994, ""y"": 46.35816224100...",2024-11-05 13:57:04-08:00
3,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{7CBBD0E9-1267-4576-8A1C-C6077FEFB12E},Oncorhynchus keta,,yes,,,Live Fish and Redd,24_Chum_12,"{""x"": -123.93689526399999, ""y"": 46.35820218700...",2024-11-05 13:57:04-08:00
4,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{C8EB9E09-144D-4B85-B0E3-7CC8BCF11320},Oncorhynchus keta,,yes,,,Live Fish and Redd,24_Chum_12,"{""x"": -123.93723986499998, ""y"": 46.35837302500...",2024-11-05 13:57:04-08:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{2FCBFA6A-1DBA-4E41-9668-3CFDC6A19832},Oncorhynchus keta,F,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00
74,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{F6CA3E44-9ADF-4BD6-A8CA-1DC811BAA566},Oncorhynchus keta,F,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00
75,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{96B453E3-CBA7-4525-BC70-A2BA7A4FC9DD},Oncorhynchus keta,M,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00
76,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{D21BF852-E2A1-432B-8437-3B1A7561F14F},,,,,,Redd,24_SouthCrk_016,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00


In [22]:
### Create fields for counting live fish entered after 11/5/2021
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['ysnReddBuilding'] == "yes", ['intReddBuilding']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['ysnPairs'] == "yes", ['dblPairs']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['ysnPairs'] == "yes", ['intMales']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['ysnPairs'] == "yes", ['intFemales']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['strLiveSex'] == "M", ['intMales']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['strLiveSex'] == "F", ['intFemales']] = 1
dfMetadataObserverLiveFish_after20211105.loc[dfMetadataObserverLiveFish_after20211105['strLiveSex'] == "Unk", ['intUnknown']] = 1
dfMetadataObserverLiveFish_after20211105.loc[((dfMetadataObserverLiveFish_after20211105['strLiveFishRedd'] == "Live Fish and Redd") | (dfMetadataObserverLiveFish_after20211105['strLiveFishRedd'] == "Redd")), ['intNumRedds']] = 1
dfMetadataObserverLiveFish_after20211105

Unnamed: 0,globalid_x,strStream,dtmDate_Pacific,ysnLiveFish,globalid_y,strLiveSpecies,strLiveSex,ysnPairs,ysnReddBuilding,intNumRedds,strLiveFishRedd,strReddID,SHAPE,CreationDate_Pacific_x,intReddBuilding,dblPairs,intMales,intFemales,intUnknown
0,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{19FFBEE1-62EC-4B5B-8E70-E3DA70345A83},Oncorhynchus keta,,yes,,1,Live Fish and Redd,24_chum_11,"{""x"": -123.936776039, ""y"": 46.35789915400005, ...",2024-11-05 13:57:04-08:00,,1.0,1.0,1.0,
1,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{CA387F9D-1A40-4823-83B0-25A9B38461E7},Oncorhynchus keta,,yes,,1,Live Fish and Redd,24_Chum_9,"{""x"": -123.93674519399997, ""y"": 46.35790351100...",2024-11-05 13:57:04-08:00,,1.0,1.0,1.0,
2,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{FAA6A6FD-BA93-4A32-89DA-EBBBFF0A5C90},Oncorhynchus keta,,yes,,1,Live Fish and Redd,24_Chum_9,"{""x"": -123.93676287499994, ""y"": 46.35816224100...",2024-11-05 13:57:04-08:00,,1.0,1.0,1.0,
3,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{7CBBD0E9-1267-4576-8A1C-C6077FEFB12E},Oncorhynchus keta,,yes,,1,Live Fish and Redd,24_Chum_12,"{""x"": -123.93689526399999, ""y"": 46.35820218700...",2024-11-05 13:57:04-08:00,,1.0,1.0,1.0,
4,{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,yes,{C8EB9E09-144D-4B85-B0E3-7CC8BCF11320},Oncorhynchus keta,,yes,,1,Live Fish and Redd,24_Chum_12,"{""x"": -123.93723986499998, ""y"": 46.35837302500...",2024-11-05 13:57:04-08:00,,1.0,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{2FCBFA6A-1DBA-4E41-9668-3CFDC6A19832},Oncorhynchus keta,F,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00,,,,1.0,
74,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{F6CA3E44-9ADF-4BD6-A8CA-1DC811BAA566},Oncorhynchus keta,F,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00,,,,1.0,
75,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{96B453E3-CBA7-4525-BC70-A2BA7A4FC9DD},Oncorhynchus keta,M,no,no,,Live Fish,,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00,,,1.0,,
76,{AB8938C9-AD92-4284-AD2B-5DD373278C29},South Creek,11/26/2024,yes,{D21BF852-E2A1-432B-8437-3B1A7561F14F},,,,,1,Redd,24_SouthCrk_016,"{""x"": 5.684341886080802e-14, ""y"": 5.6843418860...",2024-12-02 09:07:41-08:00,,,,,


In [23]:
dfMetadataObserverLiveFish_after20211105.to_csv((os.path.join(out_workspace,('WLP_Salmon_Spawning_Survey_' + year + '_' + timestamp + '.csv'))), index=False)

In [24]:
## Group by GUID, stream, date, and species; sum the numeric fields
dfLiveFishSummary2 = dfMetadataObserverLiveFish_after20211105.groupby(['globalid_x', 'strLiveSpecies'], as_index=False, dropna= False).agg(
    intNumRedds=('intNumRedds', 'sum'),
    intReddBuilding=('intReddBuilding', 'sum'),
    dblPairs=('dblPairs', 'sum'),
    intMales=('intMales', 'sum'),
    intFemales=('intFemales', 'sum'),
    intUnknown=('intUnknown', 'sum')
)

## Create field for sum of live fish
dfLiveFishSummary2['intLiveFish'] = dfLiveFishSummary2[['intMales', 'intFemales', 'intUnknown']].sum(axis=1)
dfLiveFishSummary2

Unnamed: 0,globalid_x,strLiveSpecies,intNumRedds,intReddBuilding,dblPairs,intMales,intFemales,intUnknown,intLiveFish
0,{08A9C629-4AE7-40B2-8FDF-260C9CE70103},Oncorhynchus keta,2,0.0,1.0,4.0,10.0,1.0,15.0
1,{08A9C629-4AE7-40B2-8FDF-260C9CE70103},,12,0.0,0.0,0.0,0.0,0.0,0.0
2,{0A633476-25DB-431E-BB48-5A719CFD4C41},Oncorhynchus keta,0,0.0,0.0,0.0,1.0,0.0,1.0
3,{0A633476-25DB-431E-BB48-5A719CFD4C41},Oncorhynchus kisutch,0,0.0,0.0,3.0,1.0,0.0,4.0
4,{0A633476-25DB-431E-BB48-5A719CFD4C41},Unk,0,0.0,0.0,0.0,0.0,1.0,1.0
5,{0A633476-25DB-431E-BB48-5A719CFD4C41},,0,0.0,0.0,0.0,0.0,0.0,0.0
6,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Oncorhynchus keta,3,1.0,10.0,45.0,33.0,71.0,149.0
7,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Unk,0,0.0,0.0,0.0,0.0,2.0,2.0
8,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},,33,0.0,0.0,0.0,0.0,0.0,0.0
9,{978B1564-8479-4A56-90CE-23864AC96C19},Oncorhynchus keta,0,0.0,0.0,0.0,0.0,1.0,1.0


In [25]:
### Combine live fish data from before and after 11/5/2021
dfLiveFishSummary = pd.concat([dfLiveFishSummary1, dfLiveFishSummary2])
dfLiveFishSummary

Unnamed: 0,globalid_x,strLiveSpecies,intNumRedds,intReddBuilding,dblPairs,intMales,intFemales,intUnknown,intLiveFish
0,{08A9C629-4AE7-40B2-8FDF-260C9CE70103},Oncorhynchus keta,2,0.0,1.0,4.0,10.0,1.0,15.0
1,{08A9C629-4AE7-40B2-8FDF-260C9CE70103},,12,0.0,0.0,0.0,0.0,0.0,0.0
2,{0A633476-25DB-431E-BB48-5A719CFD4C41},Oncorhynchus keta,0,0.0,0.0,0.0,1.0,0.0,1.0
3,{0A633476-25DB-431E-BB48-5A719CFD4C41},Oncorhynchus kisutch,0,0.0,0.0,3.0,1.0,0.0,4.0
4,{0A633476-25DB-431E-BB48-5A719CFD4C41},Unk,0,0.0,0.0,0.0,0.0,1.0,1.0
5,{0A633476-25DB-431E-BB48-5A719CFD4C41},,0,0.0,0.0,0.0,0.0,0.0,0.0
6,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Oncorhynchus keta,3,1.0,10.0,45.0,33.0,71.0,149.0
7,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Unk,0,0.0,0.0,0.0,0.0,2.0,2.0
8,{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},,33,0.0,0.0,0.0,0.0,0.0,0.0
9,{978B1564-8479-4A56-90CE-23864AC96C19},Oncorhynchus keta,0,0.0,0.0,0.0,0.0,1.0,1.0


In [26]:
### Testing live fish summary
dfLiveFishSummary_test1 = dfMetadataObserverLiveFish_before20211105.groupby(['globalid_x', 'strStream', 'dtmDate_Pacific', 'strLiveSpecies'], dropna= False).agg(
    intNumRedds=('intNumRedds', 'sum'),
    intReddBuilding=('intReddBuilding', 'sum'),
    dblPairs=('dblPairs', 'sum'),
    intMales=('intMales', 'sum'),
    intFemales=('intFemales', 'sum'),
    intUnknown=('intUnknown', 'sum')
)
dfLiveFishSummary_test1['intLiveFish'] = dfLiveFishSummary_test1[['intMales', 'intFemales', 'intUnknown']].sum(axis=1)
dfLiveFishSummary_test2 = dfMetadataObserverLiveFish_after20211105.groupby(['globalid_x', 'strStream', 'dtmDate_Pacific','strLiveSpecies'], dropna= False).agg(
    intNumRedds=('intNumRedds', 'sum'),
    intReddBuilding=('intReddBuilding', 'sum'),
    dblPairs=('dblPairs', 'sum'),
    intMales=('intMales', 'sum'),
    intFemales=('intFemales', 'sum'),
    intUnknown=('intUnknown', 'sum')
)
dfLiveFishSummary_test2['intLiveFish'] = dfLiveFishSummary_test2[['intMales', 'intFemales', 'intUnknown']].sum(axis=1)
dfLiveFishSummary_test = pd.concat([dfLiveFishSummary_test1, dfLiveFishSummary_test2])
dfLiveFishSummary_test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,intNumRedds,intReddBuilding,dblPairs,intMales,intFemales,intUnknown,intLiveFish
globalid_x,strStream,dtmDate_Pacific,strLiveSpecies,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
{08A9C629-4AE7-40B2-8FDF-260C9CE70103},South Creek,11/21/2024,Oncorhynchus keta,2,0.0,1.0,4.0,10.0,1.0,15.0
{08A9C629-4AE7-40B2-8FDF-260C9CE70103},South Creek,11/21/2024,,12,0.0,0.0,0.0,0.0,0.0,0.0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,Oncorhynchus keta,0,0.0,0.0,0.0,1.0,0.0,1.0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,Oncorhynchus kisutch,0,0.0,0.0,3.0,1.0,0.0,4.0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,Unk,0,0.0,0.0,0.0,0.0,1.0,1.0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,,0,0.0,0.0,0.0,0.0,0.0,0.0
{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Chum Creek,11/14/2024,Oncorhynchus keta,3,1.0,10.0,45.0,33.0,71.0,149.0
{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Chum Creek,11/14/2024,Unk,0,0.0,0.0,0.0,0.0,2.0,2.0
{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Chum Creek,11/14/2024,,33,0.0,0.0,0.0,0.0,0.0,0.0
{978B1564-8479-4A56-90CE-23864AC96C19},Omeara Creek,11/21/2024,Oncorhynchus keta,0,0.0,0.0,0.0,0.0,1.0,1.0


In [27]:
dfLiveFishSummary_test = dfLiveFishSummary_test[['intLiveFish', 'intMales', 'intFemales', 'intUnknown', 'dblPairs', 'intReddBuilding', 'intNumRedds']]
dfLiveFishSummary_test = dfLiveFishSummary_test.sort_values(by=["strStream", "dtmDate_Pacific"])
dfLiveFishSummary_test

arcpy.AddMessage("Completed live fish summary...")

In [28]:
### Create fields for counting carcasses
## Assume that null ysnCountedLast is 'yes' if strDecomposedFresh is 'Decomposed'
## Assume that null ysnCountedLast is 'no' if strDecomposedFresh is 'Fresh'
# yes OR null and decomposed
dfMetadataObserverCarcasses.loc[dfMetadataObserverCarcasses['ysnCountedLast'] == "yes", ['intCountedLast']] = dfMetadataObserverCarcasses['intNumCarcasses']
dfMetadataObserverCarcasses.loc[(dfMetadataObserverCarcasses['ysnCountedLast'].isna()) & (dfMetadataObserverCarcasses['strDecomposedFresh'] == "Decomposed"), ['intCountedLast']] = dfMetadataObserverCarcasses['intNumCarcasses']

# no OR null and fresh
dfMetadataObserverCarcasses.loc[(dfMetadataObserverCarcasses['strCarcassSex'] == "M") & ((dfMetadataObserverCarcasses['ysnCountedLast'] == "no") |  ((dfMetadataObserverCarcasses['ysnCountedLast'].isna()) &  (dfMetadataObserverCarcasses['strDecomposedFresh'] == "Fresh"))) , ['intNewMales']] = dfMetadataObserverCarcasses['intNumCarcasses']
dfMetadataObserverCarcasses.loc[(dfMetadataObserverCarcasses['strCarcassSex'] == "F") & ((dfMetadataObserverCarcasses['ysnCountedLast'] == "no") |  ((dfMetadataObserverCarcasses['ysnCountedLast'].isna()) &  (dfMetadataObserverCarcasses['strDecomposedFresh'] == "Fresh"))) , ['intNewFemales']] = dfMetadataObserverCarcasses['intNumCarcasses']
dfMetadataObserverCarcasses.loc[(dfMetadataObserverCarcasses['strCarcassSex'] == "J") & ((dfMetadataObserverCarcasses['ysnCountedLast'] == "no") |  ((dfMetadataObserverCarcasses['ysnCountedLast'].isna()) &  (dfMetadataObserverCarcasses['strDecomposedFresh'] == "Fresh"))) , ['intNewJuveniles']] = dfMetadataObserverCarcasses['intNumCarcasses']
dfMetadataObserverCarcasses.loc[(dfMetadataObserverCarcasses['strCarcassSex'] == "Unk") & ((dfMetadataObserverCarcasses['ysnCountedLast'] == "no") |  ((dfMetadataObserverCarcasses['ysnCountedLast'].isna()) &  (dfMetadataObserverCarcasses['strDecomposedFresh'] == "Fresh"))) , ['intNewUnknown']] = dfMetadataObserverCarcasses['intNumCarcasses']

In [29]:
## Group by GUID, stream, date, and species; sum the numeric fields; add field for new carcasses
dfCarcassSummary = dfMetadataObserverCarcasses.groupby(by=['globalid_x', 'strCarcassSpecies'],  axis=0, level=None, as_index=False).agg(
    intNumCarcasses=('intNumCarcasses', 'sum'),
    intCountedLast=('intCountedLast', 'sum'),
    intNewMales=('intNewMales', 'sum'),
    intNewFemales=('intNewFemales', 'sum'),
    intNewJuveniles=('intNewJuveniles', 'sum'),
    intNewUnknown=('intNewUnknown', 'sum'),
)
dfCarcassSummary['intNewNumCarcasses'] = dfCarcassSummary['intNumCarcasses'] - dfCarcassSummary['intCountedLast']

In [30]:
### Testing carcasses summary
dfCarcassSummary_test = dfMetadataObserverCarcasses.groupby(by=['globalid_x', 'strStream', 'dtmDate_Pacific', 'strCarcassSpecies'],  axis=0, level=None, dropna= False).agg(
    intNumCarcasses=('intNumCarcasses', 'sum'),
    intCountedLast=('intCountedLast', 'sum'),
    intNewMales=('intNewMales', 'sum'),
    intNewFemales=('intNewFemales', 'sum'),
    intNewJuveniles=('intNewJuveniles', 'sum'),
    intNewUnknown=('intNewUnknown', 'sum'),
)
dfCarcassSummary_test['intNewNumCarcasses'] = dfCarcassSummary_test['intNumCarcasses'] - dfCarcassSummary_test['intCountedLast']
dfCarcassSummary_test = dfCarcassSummary_test.sort_values(by=["strStream", "dtmDate_Pacific"])
dfCarcassSummary_test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,intNumCarcasses,intCountedLast,intNewMales,intNewFemales,intNewJuveniles,intNewUnknown,intNewNumCarcasses
globalid_x,strStream,dtmDate_Pacific,strCarcassSpecies,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,Oncorhynchus keta,29,2,6.0,9.0,0.0,11.0,27
{D19C25D5-B81C-4532-84E8-E3F2DDBD71DA},Chum Creek,11/05/2024,,3,0,2.0,1.0,0.0,0.0,3
{AD42D193-0670-45A7-BA9C-FCCCFA7B77B7},Chum Creek,11/12/2024,Oncorhynchus keta,104,0,2.0,2.0,0.0,98.0,104
{AD42D193-0670-45A7-BA9C-FCCCFA7B77B7},Chum Creek,11/12/2024,,0,0,0.0,0.0,0.0,0.0,0
{59CD964F-88F0-4E9F-8D4F-E44013B0DE40},Chum Creek,11/14/2024,Oncorhynchus keta,308,2,40.0,48.0,0.0,218.0,306
{CC496509-28DB-45D0-92D5-4AFB0C13FA9E},Chum Creek,11/19/2024,Oncorhynchus keta,208,190,3.0,15.0,0.0,0.0,18
{CC496509-28DB-45D0-92D5-4AFB0C13FA9E},Chum Creek,11/19/2024,,0,0,0.0,0.0,0.0,0.0,0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,Oncorhynchus keta,326,326,0.0,0.0,0.0,0.0,0
{0A633476-25DB-431E-BB48-5A719CFD4C41},Chum Creek,11/25/2024,,0,0,0.0,0.0,0.0,0.0,0
{FA509E0B-B4A5-474E-BBE2-0C222D31DE9B},Chum Creek,12/03/2024,Oncorhynchus keta,449,447,0.0,0.0,0.0,2.0,2


In [31]:
arcpy.AddMessage("Completed carcass summary...")

In [32]:
### Copy dfMetadataObserver as start of summary data frames
dfSummary = dfMetadataObserver.copy()
# Calculate zeroes
dfSummary.loc[dfSummary['ysnLiveFish'] == "no", ['intLiveFish']] = 0
dfSummary.loc[dfSummary['ysnCarcasses'] == "no", ['intCarcasses']] = 0
# Join
dfLiveFishSummary = pd.merge(dfSummary,dfLiveFishSummary, how="left", left_on="globalid", right_on="globalid_x")
dfCarcassSummary = pd.merge(dfSummary,dfCarcassSummary, how="left", left_on="globalid", right_on="globalid_x")

In [33]:
### Cleanup dfLiveFishSummary
dfLiveFishSummary.loc[(dfLiveFishSummary["intLiveFish_x"].isna()), 'intLiveFish_x'] = 0
dfLiveFishSummary.loc[(dfLiveFishSummary["intLiveFish_y"].isna()), 'intLiveFish_y'] = 0
dfLiveFishSummary["intLiveFish"] = dfLiveFishSummary["intLiveFish_x"] + dfLiveFishSummary["intLiveFish_y"]
dfLiveFishSummary = dfLiveFishSummary[['globalid', 'strStream', 'dtmDate_Pacific', 'strFullName', 'strTideStart', 'strWeather', 'dtmManualTimeStart', 'dtmManualTimeTurn', 'dtmManualTimeEnd', 'dtmManualTimeTotal', 'strStreamFlow', 'strViewingConditions', 'strViewingConditionsComments', 'ysnLiveFish', 'strLiveSpecies', 'intLiveFish', 'intMales', 'intFemales', 'intUnknown', 'intReddBuilding', 'dblPairs', 'intNumRedds', 'strComments']]
dfLiveFishSummary = dfLiveFishSummary.sort_values(by=["strStream", "dtmDate_Pacific"])

In [34]:
### Cleanup dfCarcassSummary
dfCarcassSummary.loc[(dfCarcassSummary["intCarcasses"].isna()), 'intCarcasses'] = 0
dfCarcassSummary.loc[(dfCarcassSummary["intNumCarcasses"].isna()), 'intNumCarcasses'] = 0
dfCarcassSummary["intTotalCarcasses"] = dfCarcassSummary["intCarcasses"] + dfCarcassSummary["intNumCarcasses"]
dfCarcassSummary = dfCarcassSummary[['globalid', 'strStream', 'dtmDate_Pacific', 'strFullName', 'strTideStart', 'strWeather', 'dtmManualTimeStart', 'dtmManualTimeTurn', 'dtmManualTimeEnd', 'dtmManualTimeTotal', 'strStreamFlow', 'strViewingConditions', 'strViewingConditionsComments', 'ysnCarcasses', 'strCarcassSpecies', 'intTotalCarcasses', 'intCountedLast', 'intNewNumCarcasses', 'intNewMales', 'intNewFemales', 'intNewJuveniles', 'intNewUnknown', 'strComments']]
dfCarcassSummary = dfCarcassSummary.sort_values(by=["strStream", "dtmDate_Pacific"])

In [35]:
### Export data frames
## Use archive_dt_field function to convert Python date time into format Excel can read more easily
archive_dt_field(dfMetadataObserver)
archive_dt_field(dfMetadataObserverLiveFish)
archive_dt_field(dfMetadataObserverCarcasses)
archive_dt_field(dfLiveFishSummary)
archive_dt_field(dfCarcassSummary)
    
## Create export paths for backup and writes to Excel spreadsheet
writer = pd.ExcelWriter(os.path.join(out_workspace,('WLP_Salmon_Spawning_Survey_' + year + '_' + timestamp + '.xlsx')))
dfMetadataObserver.to_excel(writer, 'Metadata', index=False)
dfMetadataObserverLiveFish.to_excel(writer, 'Live Fish', index=False)
dfMetadataObserverCarcasses.to_excel(writer, 'Carcasses', index=False)
dfLiveFishSummary.to_excel(writer, 'Live Fish Summary', index=False)
dfCarcassSummary.to_excel(writer, 'Carcass Summary', index=False)
writer.close()

arcpy.AddMessage("Summary data exported to Excel spreadsheet.")