In [None]:
"""
Gap Analysis
Author: Liam Megraw, RIT Envirionmental Science Technician
Date last edited: 10/25/2022
ESRI ArcGIS Pro Version 2.5.2

Description:
This code processes uses results from the RIT-developed computer 
vision model and iMapInvasives records to identify gaps in reporting
on a per-species basis.

Outputs:
The final outputs are polygon layers at a 1 and 5 km resolution with 
attributes per species detailing the type of records within a cell 
(model only, iMap only, or both), and if there is overlap, a ratio
between the two types of records.

How to Use:
These layers can be hosted on the ArcGIS Online Public and Manager Dashboards.
TBD on if any manual intervention is necessary...
"""

In [None]:
"""
Pseudocode Overview

Assign workspace & inputs
Create state-wide fishnets at 1 and 5 km resolutions
Create lists of geometry types
    iMap: point, line, polygon
    model: point
For both model data and iMap data:
    For each species:
        For each geometry type:
            Select records of only that species
            Spatially join records to fishnet
        Add & calculate fields
            Total join count for that species
            Overlap type if statement:
                Cells where model data join count is above zero and iMap join count above zero: both (i.e., overlap)
                Cells where model data join count is above zero and iMap join count is zero: (i.e, model only)
                Cells where model data join count is zero and iMap join count is above zero (i.e., iMap only)
            Calculate ratio between model and iMap
        Delete per-geometry spatial join fields
Export results

"""

In [1]:
#----- Get and set WD to gdb -----
import arcpy
import os
gdb = "Reporting_Analysis\\Reporting_Analysis.gdb" # Put your relative (or absolute) geodatabase path here
gdb_abs = os.path.abspath(gdb) # Gets absolute path of geodatabase

dwd = os.getcwd() # Gets current working directory
print(dwd)

if dwd in gdb_abs:
    print("Workspace path is already your defined geodatabase")
else:
    os.chdir(gdb) # Changes directory to geodatabase 
    new_path = os.getcwd() # Assigns current working directory to variable
    print("Workspace path changed to: " + new_path)
    
arcpy.env.overwriteOutput = True

C:\Users\ltm4654\Documents\ArcGIS\Projects\Final_Deployment
Workspace path is already your defined geodatabase


In [3]:
# Necessary input files
model_pred = ["pred_finalDeployment_all",] # each species must have their own column

# Create list of positive predictions for each species at one threshold
model_positives = list()
model_nd = list()
species_suffixes = ["phragmites", "knotweed", "wp", "toh", "pl"]
threshold_suffix = "_precision"
for species_suffix in species_suffixes:
    model_positives.append("SVI_Project_presences_"+species_suffix+threshold_suffix)
    model_nd.append("SVI_Project_absences_"+species_suffix+threshold_suffix)

print("Model positives:")
print(model_positives)
    
# Create list of imap features to iterate over
imap_data = list() #[imap_points, imap_lines, imap_polygons, imap_notDetected]
geometries = ["POINT","LINE","POLYGON"]
types = ["CONFIRMED","UNCONFIRMED"]
for geometry in geometries:
    for record_type in types:
        if record_type is "CONFIRMED":
            imap_data.append("PRESENCE_"+geometry)
        else:
            imap_data.append("PRESENCE_"+geometry+"_"+record_type)
imap_data.append("NOT_DETECTED_POLYGON")

print("iMap datasets:")
print(imap_data)

aoi = ['4,481,032.099500 105,606.381800 4,985,489.904000 770,761.900100'] # New york state boundary coordinates in UTM Zone 18N projection (Coordinates are expressed in the order of x-min, y-min, x-max, y-max)

# Create dictionary of long names
# Names used for filtering in ArcGIS Online
species_fullnames = {
    "phrag": "'Phragmites, Unspecified'", # extra sinlge quotes are intentional since these are used in a field calculation
    "knot": "'Knotweed, Unspecified'",
    "wp": "'Wild Parsnip'",
    "toh": "'Tree-of-Heaven (Ailanthus)'",
    "pl": "'Purple Loosestrife'"
}

# Extract only the keys to a list
species_names = species_fullnames.keys()

# IDs that iMap assigns to the various species of interest
jurisdiction_ids = {
    "phrag": 1277,
    "wp": 1182,
    "pl": 1265,
    "toh": 1167,
    "knot": (1074, 1191, 1278, 1479) # Includes Japanese knotweed, giant knotweed, bohemian knotweed, and knotweed species unknown
}

Model positives:
['SVI_Project_presences_phragmites_precision', 'SVI_Project_presences_knotweed_precision', 'SVI_Project_presences_wp_precision', 'SVI_Project_presences_toh_precision', 'SVI_Project_presences_pl_precision']
iMap datasets:
['PRESENCE_POINT', 'PRESENCE_POINT_UNCONFIRMED', 'PRESENCE_LINE', 'PRESENCE_LINE_UNCONFIRMED', 'PRESENCE_POLYGON', 'PRESENCE_POLYGON_UNCONFIRMED', 'NOT_DETECTED_POLYGON']


In [None]:
# # Code to create a fishnet for the state if you do not already have one
# cellsize_measures = '5' # two grids are desired so these values are each the width and height argument for the fishnet function
# fishnet_output_name = 'grid_nys_18N_5km'
# # Create fishnet 
# arcpy.management.CreateFishnet(fishnet_output_name, '4,985,489.904000 105,606.381800', '4,481,032.099500 105,606.381800', cellsize, cellsize, '0', '0', {corner_coord}, 'NO_LABELS', aoi, 'POLYGON')


# For Thresholdless Only

In [51]:
# For thresholdless reporting analysis
dataset_lists = [imap_data,]

# Lists of fields to fill in later
tmpCnfrm = list()
tmpUncnfrm = list()

tmpFeatures = list() # List of temp features to deleve later

# Define target feature for first run
targetFeature = "tmpRag_model"

for datasets in dataset_lists:
    if datasets is model_pred:
        print("Processing model data")
    else:
        print("Processing iMap data")
    for joinFeature in datasets:

        print("***Evaluating conditions")
        # If model data ----------
        if datasets is model_pred: 

            outFeature = "tmpRAG_model"
            fieldName = "model_points"
                
        # If iMap data ----------    
        else:
        
            # Define unique output names
            if "UNCONFIRMED" in joinFeature:
                if "POINT" in joinFeature:
                    outFeature = "tmpRAG_point_uncnfrm"
                    fieldName = "imap_point_uncnfrm"
                if "LINE" in joinFeature:
                    outFeature = "tmpRAG_line_uncnfrm"
                    fieldName = "imap_line_uncnfrm"
                if "POLYGON" in joinFeature:
                    outFeature = "tmpRAG_poly_uncnfrm"
                    fieldName = "imap_poly_uncnfrm"
                tmpUncnfrm.append("!"+fieldName+"!")
            elif "NOT_DETECTED" in joinFeature:
                outFeature = "tmpRAG_nd"
                fieldName = "iMap_nd"
            else:
                if "POINT" in joinFeature:
                    outFeature = "tmpRAG_point_cnfrm"
                    fieldName = "imap_point_cnfrm"
                if "LINE" in joinFeature:
                    outFeature = "tmpRAG_line_cnfrm"
                    fieldName = "imap_line_cnfrm"
                if "POLYGON" in joinFeature:
                    outFeature = "tmpRAG_poly_cnfrm"
                    fieldName = "imap_poly_cnfrm"

                # Add field to list for use in calculating later
                tmpCnfrm.append("!"+fieldName+"!")
            
        # Add field names to list of what fields to keep
        keepFields.append(fieldName)
        # Add feature to list to delete later
        tmpFeatures.append(outFeature)
        
        print("***Spatially joining "+joinFeature)
        # Count the features within each grid cell
        arcpy.analysis.SpatialJoin(targetFeature, joinFeature, outFeature)
        
        print("***Renaming field")
        # Rename join_count field
        arcpy.management.AlterField(outFeature, "JOIN_COUNT", fieldName, fieldName)
        # Make the output feature the input for the next join
        targetFeature = outFeature

# Delete unnecessary fields
# keepFields is iteratively appended above
print("Deleting extra fields")
keepFields = keepFields + ['model_points', 'iMap_nd', 'OBJECTID', 'Shape', 'Shape_Area', 'Shape_Length']
allFields = [f.name for f in arcpy.ListFields(outFeature)]
# Get a list of all fields *except* the ones we want to keep
deleteFields = list(set(allFields) - set(keepFields))
arcpy.management.DeleteField(outFeature, deleteFields)
        
print("Renaming final output")
# Re-name the final output feature that has all desired fields
thresholdless_feature = "reporting_analysis_grid_thresholdless"
arcpy.management.CopyFeatures(outFeature, thresholdless_feature)


print("Calculating total iMap features joined")
# Calculate the total number of iMap features joined

print("Adding count fields")
cName = "iMap_Conf"
uName = "iMap_Unconf"
arcpy.management.AddFields(thresholdless_feature, [
    [cName, 'LONG'],
    [uName, 'LONG']
])

print("***Summing: ")
tmpCnfrm = tuple(tmpCnfrm)
print(tmpCnfrm)
arcpy.management.CalculateField(thresholdless_feature, cName, tmpCnfrm[0]+"+"+tmpCnfrm[1]+"+"+tmpCnfrm[2])
print("***Summing: ")
print(tmpUncnfrm)
tmpUncnfrm = tuple(tmpUncnfrm)
arcpy.management.CalculateField(thresholdless_feature, uName, tmpUncnfrm[0]+"+"+tmpUncnfrm[1]+"+"+tmpUncnfrm[2])

print("Adding ratio fields")
crName = "C_ratio"
cuName = "CU_ratio"

arcpy.management.AddFields(thresholdless_feature, [
    [crName, 'FLOAT'],
    [cuName, 'FLOAT']
])

print("Calculating ratios")
arcpy.management.CalculateField(thresholdless_feature, crName, "!model_points!/(!iMap_Conf!+!iMap_nd!)")
arcpy.management.CalculateField(thresholdless_feature, cuName, "!model_points!/(!iMap_Conf!+!iMap_nd!+!iMap_Unconf!)")

print("Done!")

Processing iMap data
***Evaluating conditions
***Spatially joining PRESENCE_POINT
***Renaming field
***Evaluating conditions
***Spatially joining PRESENCE_POINT_UNCONFIRMED
***Renaming field
***Evaluating conditions
***Spatially joining PRESENCE_LINE
***Renaming field
***Evaluating conditions
***Spatially joining PRESENCE_LINE_UNCONFIRMED
***Renaming field
***Evaluating conditions
***Spatially joining PRESENCE_POLYGON
***Renaming field
***Evaluating conditions
***Spatially joining PRESENCE_POLYGON_UNCONFIRMED
***Renaming field
***Evaluating conditions
***Spatially joining NOT_DETECTED_POLYGON
***Renaming field
Deleting extra fields
Renaming final output
Calculating total iMap features joined
Adding count fields
***Summing: 
('!imap_point_cnfrm!', '!imap_line_cnfrm!', '!imap_poly_cnfrm!')
***Summing: 
['!imap_point_uncnfrm!', '!imap_line_uncnfrm!', '!imap_poly_uncnfrm!']
Adding ratio fields
Calculating ratios


ExecuteError: ERROR 000539: Traceback (most recent call last):
  File "<expression>", line 1, in <module>
NameError: name 'iMap_nd' is not defined

Failed to execute (CalculateField).


In [53]:
# Delete temporary files
# This way is necessary to delete the feature itself and not just its contents
import os
cws = arcpy.env.workspace

# Delete unmerged presence points
for input in tmpFeatures:
  input_path = os.path.join(cws, input)
  if arcpy.Exists(input_path):
    arcpy.Delete_management(input_path)

# For Species-Based Approach

In [16]:
# Per-species gap analysis with thresholded data
dataset_lists = [model_positives, model_nd, imap_data]
keepFields = list() # List to add fields into to keep in the final output
tmpFeatures = list()

# Define target feature for first run
targetFeature = "reporting_analysis_grid_thresholdless"

for datasets in dataset_lists:
    for species in species_names:
        # For the individual feature in each dataset list
        # Create join count features
        for joinFeature in datasets:
            print(joinFeature)
            
            # Select just the species
            
            # If model positives ----------
            if datasets is model_positives: 
                whereClause = "Common_Nam = "+species_fullnames[species]
                outFeature = "tmpRAG"+species+"_model"
                fieldName = 'model_'+species
            
            elif datasets is model_nd:
                whereClause = "Common_Nam = "+species_fullnames[species]
                outFeature = "tmpRAG"+species+"_model"
                fieldName = 'model_nd_'+species
            # If iMap data ----------    
            else:
                # Code to select 4 knotweed IDs
                if species is "knot":
                    # Set initial SQL query
                    whereClause = "jurisdiction_species_id = "+str(jurisdiction_ids["knot"][0])
                    # Add more conditions to query
                    for ID in jurisdiction_ids["knot"][1:]:
                        whereClause = whereClause + " Or jurisdiction_species_id = " + str(ID)
                # Otherwise, just select the single species ID
                else:
                    whereClause = "jurisdiction_species_id = "+str(jurisdiction_ids[species])
                
                # Define unique output names
                if "UNCONFIRMED" in joinFeature:
                    if "POINT" in joinFeature:
                        outFeature = "tmpRAG"+species+"_point"+"_uncnfrm"
                        fieldName = "imap_point_"+species+"_uncnfrm"
                    if "LINE" in joinFeature:
                        outFeature = "tmpRAG"+species+"_line"+"_uncnfrm"
                        fieldName = "imap_line_"+species+"_uncnfrm"
                    if "POLYGON" in joinFeature:
                        outFeature = "tmpRAG"+species+"_poly"+"_uncnfrm"
                        fieldName = "imap_poly_"+species+"_uncnfrm"
                elif "NOT_DETECTED" in joinFeature:
                    outFeature = "tmpRAG_nd_"+species
                    fieldName = "iMap_nd_"+species
                else:
                    if "POINT" in joinFeature:
                        outFeature = "tmpRAG"+species+"_point"+"_cnfrm"
                        fieldName = "imap_point_"+species+"_cnfrm"
                    if "LINE" in joinFeature:
                        outFeature = "tmpRAG"+species+"_line"+"_cnfrm"
                        fieldName = "imap_line_"+species+"_cnfrm"
                    if "POLYGON" in joinFeature:
                        outFeature = "tmpRAG"+species+"_poly"+"_cnfrm"
                        fieldName = "imap_poly_"+species+"_cnfrm"
            # ----------
            
            # Add field names to list of what fields to keep
            keepFields.append(fieldName)
            # Add feature to list to delete later
            tmpFeatures.append(outFeature)
            
            # Make species selection
            sel = arcpy.SelectLayerByAttribute(joinFeature, "NEW_SELECTION", whereClause)
                
            # Count the features within each grid cell
            arcpy.analysis.SpatialJoin(targetFeature, sel, outFeature)
            
            # Clear selection for next run
            arcpy.SelectLayerByAttribute(joinFeature, "CLEAR_SELECTION")
            
            # Rename the join field
            arcpy.management.AlterField(outFeature, "JOIN_COUNT", fieldName, fieldName)
            
            # Make the output feature the input for the next join
            targetFeature = outFeature

# Delete unnecessary fields
# keepFields is iteratively appended above
print("Deleting extra fields")
keepFields = keepFields + ['OBJECTID', 'Shape', 'Shape_Area', 'Shape_Length']
allFields = [f.name for f in arcpy.ListFields(outFeature)]
# Get a list of all fields *except* the ones we want to keep
deleteFields = list(set(allFields) - set(keepFields))
arcpy.management.DeleteField(outFeature, deleteFields)

# Re-name the final output feature that has all desired fields
RAG_final = "reporting_analysis_grid_final"
arcpy.management.CopyFeatures(outFeature, RAG_final)

empty
empty1
empty12
empty123
empty1234
empty12345
empty123456
empty1234567
empty12345678
empty123456789
empty12345678910
empty1234567891011
empty123456789101112
empty12345678910111213
empty1234567891011121314
empty123456789101112131415
empty12345678910111213141516
empty1234567891011121314151617
empty123456789101112131415161718
empty12345678910111213141516171819


In [None]:
# Summary calculations ----------

# Create lists to use for calculating per-species totals

geometries = ["point","line","poly"]
types = ["cnfrm","uncnfrm"]

sum_dict = dict() # filled in with 10 key-value pairs with loop below

for species in species_names:
    for record_type in types:
        tmpList = list()
        for geometry in geometries:
            sum_dict.update({"imap_"+species+"_"+record_type: []})
            
            tmpList.append("!imap_"+geometry+"_"+species+"_"+record_type+"!")
        sum_dict["imap_"+species+"_"+record_type].append(tmpList)

for datasets in dataset_lists:
    for species in species_names:
        
        if datasets not imap_data:
            pass # Wait to calculate iMap sums first
        # Multi-geometry section ----------
        
        else:
            for record_type in types:
                # Sum per-species features across all geometry types for each record type (30 fields summed into 10)
                print("Calculating per-species iMap features joined")
                # Calculate the total number of iMap features joined
                print("Adding count fields")
                rName = "iMap_"+record_type+"_"+species
                arcpy.management.AddField(RAG_final, rName, 'LONG')

                # Unpack list in dictionary entries
                sum_fields = [x for l in sum_dict['imap_'+species+"_"+record_type] for x in l]

                # Create expression for calculating sum of all geometry types
                express = sum_fields[0]+"+"+sum_fields[1]+"+"+sum_fields[2]

                print("***Summing: ")
                arcpy.management.CalculateField(RAG_final, rName, express)
        # ----------
        
        # Ratios are then calculated after the iMap calculation is complete
        print("Adding ratio fields")
        crName = "Cr_"+species
        cuName = "CUr_"+species
        ndName = "NDr_"+species

        arcpy.management.AddFields(RAG_final, [
            [crName, 'FLOAT'],
            [cuName, 'FLOAT'],
            [ndName, 'FLOAT']
        ])

        print("Calculating ratios")
        arcpy.management.CalculateField(RAG_final, crName, "!model_"+species+"!/!iMap_cnfrm_"+species+"!")
        arcpy.management.CalculateField(RAG_final, cuName, "!model_"+species+"!/(!iMap_cnfrm_"+species+"!+"+"!iMap_uncnfrm_"+species+"!)")
        arcpy.management.CalculateField(RAG_final, ndName, "!model_nd_"+species+"!/!iMap_nd_"+species+"!")

        print("Done!")

In [49]:
# Create lists to use for calculating per-species totals

geometries = ["point","line","poly"]
types = ["cnfrm","uncnfrm"]

sum_dict = dict()

for species in species_names:
    for record_type in types:
        tmpList = list()
        for geometry in geometries:
            sum_dict.update({"imap_"+species+"_"+record_type: []})
            
            tmpList.append("!imap_"+geometry+"_"+species+"_"+record_type+"!")
        sum_dict["imap_"+species+"_"+record_type].append(tmpList)

print(sum_dict)

# Unpack list in dictionary entries
express_fields = [x for l in sum_dict['imap_phrag_uncnfrm'] for x in l]

# Create expression for calculating fields
express = express_fields[0]+"+"+express_fields[1]+"+"+express_fields[2]
express


{'imap_phrag_cnfrm': [['!imap_point_phrag_cnfrm!', '!imap_line_phrag_cnfrm!', '!imap_poly_phrag_cnfrm!']], 'imap_phrag_uncnfrm': [['!imap_point_phrag_uncnfrm!', '!imap_line_phrag_uncnfrm!', '!imap_poly_phrag_uncnfrm!']], 'imap_knot_cnfrm': [['!imap_point_knot_cnfrm!', '!imap_line_knot_cnfrm!', '!imap_poly_knot_cnfrm!']], 'imap_knot_uncnfrm': [['!imap_point_knot_uncnfrm!', '!imap_line_knot_uncnfrm!', '!imap_poly_knot_uncnfrm!']], 'imap_wp_cnfrm': [['!imap_point_wp_cnfrm!', '!imap_line_wp_cnfrm!', '!imap_poly_wp_cnfrm!']], 'imap_wp_uncnfrm': [['!imap_point_wp_uncnfrm!', '!imap_line_wp_uncnfrm!', '!imap_poly_wp_uncnfrm!']], 'imap_toh_cnfrm': [['!imap_point_toh_cnfrm!', '!imap_line_toh_cnfrm!', '!imap_poly_toh_cnfrm!']], 'imap_toh_uncnfrm': [['!imap_point_toh_uncnfrm!', '!imap_line_toh_uncnfrm!', '!imap_poly_toh_uncnfrm!']], 'imap_pl_cnfrm': [['!imap_point_pl_cnfrm!', '!imap_line_pl_cnfrm!', '!imap_poly_pl_cnfrm!']], 'imap_pl_uncnfrm': [['!imap_point_pl_uncnfrm!', '!imap_line_pl_uncnfrm!',

'!imap_point_phrag_uncnfrm!+!imap_line_phrag_uncnfrm!+!imap_poly_phrag_uncnfrm!'

In [51]:
print("!model_"+species+"!/!iMap_cnfrm_"+species+"!")
print("!model_"+species+"!/(!iMap_cnfrm_"+species+"!+"+"!iMap_uncnfrm_"+species+"!)")
print("!model_nd_"+species+"!/!iMap_nd_"+species+"!")

!model_pl!/!iMap_cnfrm_pl!
!model_pl!/(!iMap_cnfrm_pl!+!iMap_uncnfrm_pl!)
!model_nd_pl!/!iMap_nd_pl!


In [23]:
# Delete temporary files
# This way is necessary to delete the feature itself and not just its contents
import os
cws = arcpy.env.workspace

# Delete unmerged presence points
for input in tmpFeatures:
  input_path = os.path.join(cws, input)
  if arcpy.Exists(input_path):
    arcpy.Delete_management(input_path)

In [25]:
dataset_lists = [model_positives, imap_data]
for datasets in dataset_lists:
    for species in species_names:
        # For the individual feature in each dataset list
        # Create join count features
        for joinFeature in datasets:
            # Select just the species
            # Code to select 4 knotweed IDs
            if species is "knot":
                whereClause = "jurisdiction_species_id = "+str(jurisdiction_ids["knot"][0])
                for ID in jurisdiction_ids["knot"][1:]:
                    whereClause = whereClause + " Or jurisdiction_species_id = " + str(ID)
            # Otherwise, just select the single species ID
            else:
                whereClause = "jurisdiction_species_id = "+str(jurisdiction_ids[species])
                
            print(whereClause)

SyntaxError: invalid syntax (<string>, line 12)

In [33]:
whereClause = "jurisdiction_species_id = "+str(jurisdiction_ids["knot"][0])
for ID in jurisdiction_ids["knot"][1:]:
    whereClause = whereClause + " Or jurisdiction_species_id = " + str(ID)
print(whereClause)

jurisdiction_species_id = 1074 Or jurisdiction_species_id = 1191 Or jurisdiction_species_id = 1278 Or jurisdiction_species_id = 1479
