# Description

In [5]:
"""
Model Result Processing
Author: Liam Megraw, RIT Envirionmental Science Technician
Date last edited: 4/6/2023
ESRI ArcGIS Pro Version 2.5.2
Default Python 3.x kernel


Description:

This code processes results from the RIT-developed computer 
vision model into thresholded predictions of presence and 
absence for multiple species at three thresholds each.

Inputs required, stored in one geodatabase at the same projection:
> 1 feature class of roads within your study area
> 1 or more feature class(es) containing unthresholded CV model points

Outputs:

The final outputs are a point layer and a line feature layer.
The point layer contains computer vision model-predicted points
above a threshold for five target species. The line layer 
contains computer vision model-predicted locations *below* the 
chosen threshold for five target species, representing absences


How to Use:
If you are running this for the first model output file from 2022, no code needs 
to be changed. If you are running for a later model output, you may want to 
choose a set of different threshold names in cell three if those in 
["recall","F1","precision"] no longer match the decision criteria scenarios 
you've chosen. If you are running for species beyond the initial five, you need 
to add entries into the "species_fullnames" and the "jurisdiction_ids" dictionaries 
in cell three.

Once the script is complete, you can then replace the old 
absence lines on ArcGIS Online and input the model positives 
into the "model_prediction_priority.ipynb" code to replace the positives.
"""

"\nModel Result Processing\nAuthor: Liam Megraw, RIT Envirionmental Science Technician\nDate last edited: 3/16/2023\nESRI ArcGIS Pro Version 2.5.2\n\n\nDescription:\n\nThis code processes results from the RIT-developed computer \nvision model into thresholded predictions of presence and \nabsence for multiple species at three thresholds each.\n\nInputs:\n> 1 feature class of roads within your study area\n> 1 or more feature class(es) containing unthresholded CV model points\n\nOutputs:\n\nThe final outputs are a point layer and a line feature layer.\nThe point layer contains computer vision model-predicted points\nabove a threshold for five target species. The line layer \ncontains computer vision model-predicted locations *below* the \nchosen threshold for five target species, representing absences\n\n\nHow to Use:\n\nEditing is necessary to assign your geodatabse location for road \ndata, as well as either a shapefile/feature class for raw model \ndata OR the folder of raw model data

# Define workspace

In [4]:
import arcpy
from arcpy import env
arcpy.env.overwriteOutput = True
arcpy.env.workspace = input('Enter geodatabase path: ')
roads = input("Enter road dataset name here: ")
def import_CSVs():
    process_need = input("Do you need to import datasets from CSVs? (y/n): ")
    if process_need == 'y':
        # Batch import csvs into gdb
        model_pred_folder = input("Filepath containing model prediction CSVs: ")
        print("Importing CSVs...")
        arcpy.intelligence.BatchImportData(model_pred_folder, arcpy.env.workspace, "*.csv", 
                                   "NO_SUBFOLDERS"
                                  )
    elif process_need == 'n':
        pass
    else:
        print("Incorrect entry, try again")
        import_CSVs()
def create_processing_list():
    number_choice = input("Do you have more than one input dataset you'd like to process? (y/n): ")
    if number_choice == "y":
        n_start = input("Starting dataset number (e.g., enter '5' for pred_5): ")
        n_end = input("Ending dataset number : ")
        inputFeatures = list()
        for i in range(int(n_start),int(n_end)+1):
            inputFeatures.append("pred_"+str(i))
        return inputFeatures
    elif number_choice == "n":
        inputFeatures = [input("Enter dataset name: "),]
        return inputFeatures
    else:
        print("Incorrect entry, try again")
        create_processing_list()
import_CSVs()
inputFeatures = create_processing_list()
print("Datasets to process:",inputFeatures)

['pred_1']


# Define Overall Parameters - edits may be desired

In [3]:
# Parameters that MAY be edited if scaling to a different numbers of 
# species

# List of "species" name (some are technically groups of target 
# species) for presence records; this is used when filtering in 
# ArcGIS Online
species_fullnames = {
    # These extra sinlge quotes are intentional since 
    # they are used in a field calculation
    "phrag": "'Phragmites, Unspecified'", 
    "knot": "'Knotweed, Unspecified'",
    "wp": "'Wild Parsnip'",
    "toh": "'Tree-of-Heaven (Ailanthus)'",
    "pl": "'Purple Loosestrife'"
}
# This list of species ranges from 1-k, returning n layers equal to 
# k*thresholds*operators (e.g., 30 by default with five species, 
# three thresholds, and two operators)
species_list = species_fullnames.keys()
# List of optimized performance criteria suffixes
op_criteria = ["_recall", "_F1", "_precision"]
# Create a dictionary of threshold values per species at each 
# optimized criteria
threshold_dict = {
    "phrag_recall": 0.07,
    "phrag_F1": 0.33,
    "phrag_prec": 0.9,
    "knot_recall": 0.03,
    "knot_F1": 0.29,
    "knot_prec": 0.8,
    "wp_recall": 0.25,
    "wp_F1": 0.49,
    "wp_prec": 0.8,
    "toh_recall": 0.09,
    "toh_F1": 0.63,
    "toh_prec": 0.9,
    "pl_recall": 0.05,
    "pl_F1": 0.44,
    "pl_prec": 0.93
}
# This parameter should not change unless you have a good reason to 
# do so. This is the maximum distance to consider model predictions 
# continuous, defined by the 95th percentile of panorama separation 
# in Broome County, NY.
line_ag_dist = "12.4 meters" 

# Threshold, Combine, and Create Presence and Absence Features from Model Predictions

In [6]:
# Threshold, Combine, and Create Presence and Absence Features 
# from Model Predictions
import os # This module is used when deleting items

selectionType = "NEW_SELECTION"
fieldName = "Common_Nam" # Name the field for common species name
op_criteria_field = 'op_criteria'

# Define to process the conditions >= threshold and < threshold
operators = [" >= ", " < "]
record_types = ["presence", "absence"]

# Set parameters for buffering to create absence lines
bufferDistance =  line_ag_dist # Defined in cell 3
sideType = "FULL"
endType = "ROUND"
# Dissolves based on date, accounting for spatially overlapping 
# but temporally distinct records
dissolveType = "LIST" 
dissolveField = ["date"]
method = "PLANAR"

# Used when deleting items
cws = arcpy.env.workspace

print("Processing features: ")
print(inputFeatures)
# Create loop for each condition (presence and absence records), 
# ultimately returning n layers for each presence and absence
for inputFeature in inputFeatures:
    
    absenceBufferList = list()
    absenceLineList = list()
    
    print("----------Running for "+inputFeature+" ----------")
    # Add fields to contain species name and optimized performance 
    # criteria
    print("Adding fields...")
    # For common name to filter on ArcGIS Online
    arcpy.management.AddField(inputFeature, fieldName, "TEXT")
    # For decision criteria to filter on ArcGIS Online
    arcpy.management.AddField(inputFeature, op_criteria_field, "TEXT")
    # Create empty lists to add names into for merging later
    mergeListPresence = list()
    mergeListAbsence = list()
    
    for i1, operator in enumerate(operators):
        print("Creating "+record_types[i1]+" records")
        # Loop through three thresholds for each species
        for species in species_list:
            print("***Processing: "+species_fullnames[species])
            # Create a list of thresholds for the species
            thresholds = [threshold_dict[species+"_recall"],
                          threshold_dict[species+"_F1"],
                          threshold_dict[species+"_prec"]
                          ]
            # Iterate processing for each of the thresholds
            for i, threshold in enumerate(thresholds):
                # Define the conditions of threshold and date range 
                # if desired, per species
                if species is "wp":
                    whereClause = ("wild_parsnip"
                                   + operators[i1]
                                   + str(threshold) 
                                   + " And (date LIKE '%-05' Or date LIKE '%-06' Or date LIKE '%-07')"
                                  )
                if species is "toh":
                    whereClause = ("tree_of_heaven_with_seeds"
                                   + operators[i1]
                                   + str(threshold) 
                                   + " And (date LIKE '%-07' Or date LIKE '%-08' Or date LIKE '%-09' Or date LIKE '%-10')"
                                  )
                if species is "pl":
                    #loosestrife may be spelled incorrectly as 
                    # 'loostrife' in the input table
                    whereClause = ("purple_loosestrife"
                                   + operators[i1]
                                   + str(threshold) 
                                   + " And (date LIKE '%-07' Or date LIKE '%-08' Or date LIKE '%-09' Or date LIKE '%-10')"
                                  ) 
                if species is "phrag":
                    whereClause = ("phra" + operators[i1] 
                                   + str(threshold))
                if species is "knot":
                    whereClause = ("knot" + operators[i1] 
                                   + str(threshold))
                else:
                    pass
                # Return species name and recall, F1, or precision 
                # suffix
                message = (species_fullnames[species]+op_criteria[i])
                print("******Running for: "+message)
                # Select only the model predictions above/below the 
                # selected thresholds then split into a layer for 
                # each species for further analysis
                sel = arcpy.management.SelectLayerByAttribute(inputFeature, selectionType, whereClause, "") 
                # Define output name, e.g., temp_presence_phrag_recall
                outputFeature = ("temp_"+record_types[i1]+"_"
                                 +species+op_criteria[i]
                                 ) 
                # Copy features to new output
                arcpy.management.CopyFeatures(sel, outputFeature)
                # Clear selection for next run
                arcpy.management.SelectLayerByAttribute(inputFeature, 
                                                        'CLEAR_SELECTION'
                                                        )
                # Presence point attribute addition
                if operator is " >= ":
                    # add feature name to a list to feed into the 
                    # merge function later
                    mergeListPresence.append(outputFeature) 
                    # Only assign species and criteria 
                    in_table = outputFeature
                    # Set field equal to species name, indicating the
                    # record type
                    arcpy.management.CalculateField(in_table, 
                                                    fieldName, 
                                                    species_fullnames[species]
                                                    ) 
                    # Set field equal to criteria name, either 
                    # 'recall', 'F1', or 'precision'. The last 
                    # argument picks the ith string in 
                    # op_criteria then selects the characters 
                    # from 1 until the end of the string  
                    arcpy.management.CalculateField(in_table, 
                                                    op_criteria_field,
                                                    ("'"+(op_criteria[i])[1:]+"'")) 
                
                # Create absence lines
                else:
                    mergeListAbsence.append(outputFeature)
                    print("******Buffering and creating absence lines for "+outputFeature)
                    outputBuffer = (outputFeature + "_NO")
                    # Buffer the feature
                    arcpy.Buffer_analysis(outputFeature, 
                                          outputBuffer, 
                                          bufferDistance, 
                                          sideType, 
                                          endType, 
                                          dissolveType, 
                                          dissolveField, 
                                          method
                                          )
                    # Add output names to list for deleting 
                    # later
                    absenceBufferList.append(outputBuffer)
                    # Add fields to add criteria and species
                    arcpy.management.AddField(outputBuffer, 
                                              fieldName, 
                                              "TEXT"
                                              )
                    arcpy.management.AddField(outputBuffer,
                                              op_criteria_field,
                                              "TEXT"
                                              )
                    arcpy.management.CalculateField(outputBuffer, 
                                                    fieldName,
                                                    species_fullnames[species]
                                                    ) 
                    arcpy.management.CalculateField(outputBuffer,
                                                    op_criteria_field,
                                                    "'"+(op_criteria[i])[1:]+"'") 
                    # Intersect each buffer with roads
                    outputRoads = (outputFeature + "_road")
                    arcpy.analysis.Intersect([outputBuffer, roads],
                                             outputRoads, 
                                             "",
                                             "",
                                             "LINE")
                    # Add output names to list for merging
                    absenceLineList.append(outputRoads)
                    # Delete the FID field
                    field = "FID_temp_absence_"+species+op_criteria[i]+"_NO"
                    arcpy.management.DeleteField(outputRoads, 
                                                 field)
    print("Presence points and absence line records created for "+inputFeature)
    
    # Create final merged features
    if " >= " in operators:
        print("Merging processed features for "+inputFeature)
        # Merge presence points together
        outputName = ("model_predicted_presences_"+inputFeature)
        arcpy.management.Merge(mergeListPresence, outputName)
        print("***Final processed presence file named " + outputName 
              + " is complete")
    # Merge absence points together
    outputName = ("model_predicted_absences_"+inputFeature)
    arcpy.management.Merge(mergeListAbsence, outputName)
    print("***Final processed presence file named " + outputName 
          + " is complete")
    # Merge absence lines together
    outputName = ("model_not_observed_"+inputFeature)
    arcpy.management.Merge(absenceLineList, outputName)
    print("***Final processed absence file named " + outputName 
          + " is complete")
    # Delete temporary files; this way is necessary to delete the 
    # feature itself and not just its contents. The directory is
    # defined above the main loop
    print("Deleting temporary files")
    # Delete unmerged presence points
    print("***Deleting presence points")
    for input in mergeListPresence:
      input_path = os.path.join(cws, input)
      if arcpy.Exists(input_path):
        arcpy.Delete_management(input_path)
    print("***Deleting absence buffers")
    # Delete temporary absence buffers
    for input in absenceBufferList:
      input_path = os.path.join(cws, input)
      if arcpy.Exists(input_path):
        arcpy.Delete_management(input_path)
    print("***Deleting absence points")
    # Delete unmerged absence points
    for input in mergeListAbsence:
      input_path = os.path.join(cws, input)
      if arcpy.Exists(input_path):
        arcpy.Delete_management(input_path)
    print("***Deleting absence lines")
    # Delete unmerged absence lines
    for input in absenceLineList:
      input_path = os.path.join(cws, input)
      if arcpy.Exists(input_path):
        arcpy.Delete_management(input_path)
    print("Temporary files deleted")
    
    print("----------Processing completed for "
          +inputFeature+" ----------")

Processing features: 
['Hamilton_AGOL_op']
----------Running for Hamilton_AGOL_op ----------
Adding fields...
Creating absence records
***Processing: 'Phragmites, Unspecified'
******Running for: 'Phragmites, Unspecified'_recall
******Buffering and creating absence lines for temp_absence_phrag_recall
******Running for: 'Phragmites, Unspecified'_F1
******Buffering and creating absence lines for temp_absence_phrag_F1
******Running for: 'Phragmites, Unspecified'_precision
******Buffering and creating absence lines for temp_absence_phrag_precision
***Processing: 'Knotweed, Unspecified'
******Running for: 'Knotweed, Unspecified'_recall
******Buffering and creating absence lines for temp_absence_knot_recall
******Running for: 'Knotweed, Unspecified'_F1
******Buffering and creating absence lines for temp_absence_knot_F1
******Running for: 'Knotweed, Unspecified'_precision
******Buffering and creating absence lines for temp_absence_knot_precision
***Processing: 'Wild Parsnip'
******Running for: