Cleanup tasks for later
- Review commented out stuff - take out what I don't need
- Also check the 2nd notebook for variables not needed, esp. environment variables
- Review comments and add where needed

## Purpose
This notebook ingests the CSV created in the previous notebook (cleaned records of customer complaints) to do a spatial join on the CPA polygons to create a feature class with the individual complaints - still at detail level - joined with information about which CSV they are in 

## Expected input: 
CPA_FC_PRJ feature class created in Notebook 01 - CPA polygons reprojected to StatePlane CA VI (2229)
CANDIDATE_CSV cleaned up complaint data created in Notebook 02 

## Run Order
1. Run all cells top-to-bottom


In [17]:
# Set up environment

import os
import pandas as pd
import arcpy

arcpy.env.overwriteOutput = True

PROJECT_ROOT = r"C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone"

WORK_DIR = os.path.join(PROJECT_ROOT, "data_working")
GDB_PATH = os.path.join(WORK_DIR, "GetItDoneAnalysis.gdb")

# Inputs
CPA_FC_PRJ = os.path.join(GDB_PATH, "cpa_prj")  # from Notebook 1
CANDIDATE_CSV = os.path.join(WORK_DIR, "gid_drainage_candidate_last30.csv")

# Set workspace for easier listing
arcpy.env.workspace = GDB_PATH

for p in [CANDIDATE_CSV, CPA_FC_PRJ]:
    if not os.path.exists(p) and not arcpy.Exists(p):
        raise FileNotFoundError(f"Missing required input: {p}")

print("CSV:", CANDIDATE_CSV)
print("CPA:", CPA_FC_PRJ)
print("GDB:", GDB_PATH)

CSV: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\gid_drainage_candidate_last30.csv
CPA: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\cpa_prj
GDB: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb


In [18]:
COMPLAINTS_DTL = os.path.join(GDB_PATH, "complaints_detail")

# Make an XY Event Layer from the CSV
xy_layer = "gid_drainage_xy"

# Spatial reference for incoming coords (WGS84)
wgs84 = arcpy.SpatialReference(4326)

# Create event layer
arcpy.management.MakeXYEventLayer(
    table=CANDIDATE_CSV,
    in_x_field="lon",
    in_y_field="lat",
    out_layer=xy_layer,
    spatial_reference=wgs84
)

# Copy to feature class in GDB
if arcpy.Exists(COMPLAINTS_DTL):
    arcpy.management.Delete(COMPLAINTS_DTL)

arcpy.management.CopyFeatures(xy_layer, COMPLAINTS_DTL)

print("Created points FC:", COMPLAINTS_DTL)
print("Point count:", arcpy.management.GetCount(COMPLAINTS_DTL)[0])

Created points FC: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\complaints_detail
Point count: 3328


In [19]:
COMPLAINTS_DTL_PRJ = os.path.join(GDB_PATH, "complaints_detail_projected")

# Project to StatePlane CA VI (Feet) to match CPA
TARGET_SR = arcpy.SpatialReference(2229)

if arcpy.Exists(COMPLAINTS_DTL_PRJ):
    arcpy.management.Delete(COMPLAINTS_DTL_PRJ)

arcpy.management.Project(
    in_dataset=COMPLAINTS_DTL,
    out_dataset=COMPLAINTS_DTL_PRJ,
    out_coor_system=TARGET_SR
)

print("Projected points FC:", COMPLAINTS_DTL_PRJ)
print("Projected SR:", arcpy.Describe(COMPLAINTS_DTL_PRJ).spatialReference.name)

# Remove complaints features in previous projection so it's not used by mistake
if arcpy.Exists(COMPLAINTS_DTL):
    arcpy.management.Delete(COMPLAINTS_DTL)

Projected points FC: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\complaints_detail_projected
Projected SR: NAD_1983_StatePlane_California_V_FIPS_0405_Feet


In [20]:
# Join GetItDone complaints individual data points to the CPA polygons they belong to
COMPLAINTS_DTL_WITH_CPA = os.path.join(GDB_PATH, "gid_drainage_points_lastmonth_with_cpa")

# Clean slate
if arcpy.Exists(COMPLAINTS_DTL_WITH_CPA):
    arcpy.management.Delete(COMPLAINTS_DTL_WITH_CPA)

# Spatial join: points (target) get CPA polygon attributes (join)
arcpy.analysis.SpatialJoin(
    target_features=COMPLAINTS_DTL_PRJ,   # points
    join_features=CPA_FC_PRJ,             # polygons
    out_feature_class=COMPLAINTS_DTL_WITH_CPA,
    join_operation="JOIN_ONE_TO_ONE",
    join_type="KEEP_ALL",
    match_option="INTERSECT"
)

print("Created:", COMPLAINTS_DTL_WITH_CPA)
print("Input point count:", arcpy.management.GetCount(COMPLAINTS_DTL_PRJ)[0])
print("Output count:", arcpy.management.GetCount(COMPLAINTS_DTL_WITH_CPA)[0])

# Deleting the features that don't have the CPAs joined.
# keeping it just creates confusion when looking at feature classes in arcgis pro
if arcpy.Exists(COMPLAINTS_DTL_PRJ):
    arcpy.management.Delete(COMPLAINTS_DTL_PRJ)


Created: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\gid_drainage_points_lastmonth_with_cpa
Input point count: 3328
Output count: 3328


In [21]:
unmatched = int(arcpy.management.GetCount(arcpy.management.MakeFeatureLayer(
    COMPLAINTS_DTL_WITH_CPA, "lyr_unmatched", "Join_Count = 0"
))[0])
print("Unmatched points (Join_Count=0):", unmatched)

Unmatched points (Join_Count=0): 4


In [22]:
# --- Split matched vs unmatched after spatial join ---
# 
COMPLAINTS_DTL_WITH_CPA_ERRORS = os.path.join(GDB_PATH, "gid_drainage_points_lastmonth_with_cpa_errors")
COMPLAINTS_DTL_WITH_CPA_MATCHED = os.path.join(GDB_PATH, "gid_drainage_points_lastmonth_with_cpa_matched")

# Clean slate
for fc in [COMPLAINTS_DTL_WITH_CPA_ERRORS, COMPLAINTS_DTL_WITH_CPA_MATCHED]:
    if arcpy.Exists(fc):
        arcpy.management.Delete(fc)

# Matched (Join_Count > 0)
arcpy.analysis.Select(
    in_features=COMPLAINTS_DTL_WITH_CPA,
    out_feature_class=COMPLAINTS_DTL_WITH_CPA_MATCHED,
    where_clause="Join_Count > 0"
)

# Errors / unmatched (Join_Count = 0 OR null)
arcpy.analysis.Select(
    in_features=COMPLAINTS_DTL_WITH_CPA,
    out_feature_class=COMPLAINTS_DTL_WITH_CPA_ERRORS,
    where_clause="Join_Count = 0 OR Join_Count IS NULL"
)

# Replace COMPLAINTS_DTL_WITH_CPA with matched-only version
arcpy.management.Delete(COMPLAINTS_DTL_WITH_CPA)
arcpy.management.Rename(COMPLAINTS_DTL_WITH_CPA_MATCHED, COMPLAINTS_DTL_WITH_CPA)

print("Matched complaints FC:", COMPLAINTS_DTL_WITH_CPA)
print("Matched count:", arcpy.management.GetCount(COMPLAINTS_DTL_WITH_CPA)[0])

print("Error/unmatched complaints FC:", COMPLAINTS_DTL_WITH_CPA_ERRORS)
print("Error/unmatched count:", arcpy.management.GetCount(COMPLAINTS_DTL_WITH_CPA_ERRORS)[0])


Matched complaints FC: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\gid_drainage_points_lastmonth_with_cpa
Matched count: 3324
Error/unmatched complaints FC: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb\gid_drainage_points_lastmonth_with_cpa_errors
Error/unmatched count: 4
