In [6]:
# Imports and environment setup
import os
import arcpy
import requests
import zipfile
from pathlib import Path

arcpy.env.overwriteOutput = True

# --- Set project root folder
PROJECT_ROOT = r"C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone"

RAW_DIR = os.path.join(PROJECT_ROOT, "data_raw")
WORK_DIR = os.path.join(PROJECT_ROOT, "data_working")
GDB_PATH = os.path.join(WORK_DIR, "GetItDoneAnalysis.gdb")

# --- Source ZIP (CPA boundaries)
CPA_ZIP_URL = "https://seshat.datasd.org/gis_community_planning_districts/cmty_plan_datasd.zip"
CPA_ZIP_PATH = os.path.join(RAW_DIR, "cmty_plan_datasd.zip")
CPA_EXTRACT_DIR = os.path.join(RAW_DIR, "communityPlanningAreasShp")

# Create folders if missing
for d in [RAW_DIR, WORK_DIR, CPA_EXTRACT_DIR]:
    os.makedirs(d, exist_ok=True)

# Create a file geodatabase if missing
if not arcpy.Exists(GDB_PATH):
    arcpy.management.CreateFileGDB(WORK_DIR, os.path.basename(GDB_PATH))

print("RAW_DIR:", RAW_DIR)
print("WORK_DIR:", WORK_DIR)
print("GDB_PATH:", GDB_PATH)

# ----------------------------
# Download ZIP (always refresh)
# ----------------------------
def download_file(url: str, out_path: str, chunk_size: int = 1024 * 1024) -> str:
    """Download a URL to out_path (streaming). Overwrites existing file."""
    r = requests.get(url, stream=True, timeout=60)
    r.raise_for_status()
    with open(out_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=chunk_size):
            if chunk:
                f.write(chunk)
    return out_path

print("Downloading latest CPA ZIP (overwrite)...")
download_file(CPA_ZIP_URL, CPA_ZIP_PATH)
print("Downloaded:", CPA_ZIP_PATH)

# ----------------------------
# Unzip (always refresh)
# ----------------------------
def clear_folder(folder_path: str) -> None:
    """Delete files in a folder (recursively) without deleting the folder itself."""
    p = Path(folder_path)
    if not p.exists():
        p.mkdir(parents=True, exist_ok=True)
        return
    for child in p.rglob("*"):
        if child.is_file():
            child.unlink()

def unzip_zip(zip_path: str, extract_dir: str) -> None:
    """Extract a zip file into extract_dir."""
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(extract_dir)

print("Clearing previous extracted CPA files...")
clear_folder(CPA_EXTRACT_DIR)

print("Extracting CPA ZIP (overwrite)...")
unzip_zip(CPA_ZIP_PATH, CPA_EXTRACT_DIR)
print("Extracted to:", CPA_EXTRACT_DIR)

# ----------------------------
# Locate the CPA shapefile
# ----------------------------
shps = list(Path(CPA_EXTRACT_DIR).rglob("*.shp"))
if len(shps) == 0:
    raise FileNotFoundError(f"No .shp found after extraction in: {CPA_EXTRACT_DIR}")

# Prefer expected name if present
preferred = [p for p in shps if p.name.lower() == "cmty_plan_datasd.shp"]
CPA_SHP = str(preferred[0] if preferred else shps[0])

print("CPA shapefile found:", CPA_SHP)


RAW_DIR: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_raw
WORK_DIR: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working
GDB_PATH: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_working\GetItDoneAnalysis.gdb
Downloading latest CPA ZIP (overwrite)...
Downloaded: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_raw\cmty_plan_datasd.zip
Clearing previous extracted CPA files...
Extracting CPA ZIP (overwrite)...
Extracted to: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_raw\communityPlanningAreasShp
CPA shapefile found: C:\Users\kris_\OneDrive - Kris Manske\Documents\Classes\BootcampGIS\Wildfire repositories on AWS\GetItDone\data_raw\communityPlanningAreasShp\cmty_pla