In [None]:
import geopandas as gpd
import glob, os
import numpy as np, pandas as pd
from shapely.geometry import shape, Polygon, MultiPolygon
from shapely.validation import make_valid

In [None]:
dissolve = True
top_dir = f"/datawaha/esom/DatePalmCounting/Geoportal/Center_pivot"
region_keys = ['AJ','TKS',"Qassim1",'Qassim2','Qassim3','Qassim4','EAST']
year_keys1 = [str(i) for i in range(1990,2015,5)]
year_keys2 = [str(i) for i in range(2015,2019)]
year_keys = year_keys1 + year_keys2
year_keys

out_dir = "/datawaha/esom/DatePalmCounting/Geoportal/Center_pivot/year_base_consistent"
os.makedirs(out_dir, exist_ok=True)



In [None]:
def _fill_holes(geom):
    """Return geometry with all interior rings removed."""
    if geom.is_empty:
        return geom
    geom = make_valid(geom)
    if isinstance(geom, Polygon):
        return Polygon(geom.exterior)
    if isinstance(geom, MultiPolygon):
        return MultiPolygon([Polygon(p.exterior) for p in geom.geoms if not p.is_empty])
    # For any other type (rare after polygonize), just return as-is
    return geom

def _field_type_from_fname(path):
    """Extract your fd flag (tar_fd) from filename and map to Field_type."""
    # Safeguard in case of unexpected name patterns
    parts = os.path.basename(path).split('_')
    tar_fd = None
    if len(parts) >= 6 and parts[5]:
        tar_fd = parts[5][0]
    if tar_fd == '1':
        return 'circles'
    elif tar_fd == '2':
        return 'fans'
    elif tar_fd == '5':
        return 'merged'
    else:
        return 'unknown'

In [None]:

from tqdm.auto import tqdm
# === Progress: Years ===
for year_key in tqdm(year_keys, desc="Years", position=0):
    chunks = []  # collect per-file GeoDataFrames

    # === Progress: Regions within this year ===
    for region_key in tqdm(region_keys, desc=f"Regions ({year_key})", position=1, leave=False):
        in_shp_list = glob.glob(f"{top_dir}/{year_key}_{region_key}*.shp")

        if not in_shp_list:
            # Show a note but keep going
            tqdm.write(f"[WARN] No shapefiles for year={year_key}, region={region_key}")
            continue

        # === Progress: Files within this region ===
        for in_shp in tqdm(in_shp_list, desc=f"Files ({region_key})", position=2, leave=False):
            try:
                gdf = gpd.read_file(in_shp)
            except Exception as e:
                tqdm.write(f"[ERROR] Failed to read {in_shp}: {e}")
                continue

            # Add classification from file name
            gdf['Field_type'] = _field_type_from_fname(in_shp)

            # Add Year/Region tags now (so each chunk carries them)
            gdf['Year'] = year_key
            gdf['Region'] = region_key

            # Compute areas in meters^2 using an equal-area CRS, then keep attributes
            try:
                gdf_moll = gdf.to_crs(6933)  # World Cylindrical Equal Area
                gdf['Acreage_m2'] = gdf_moll.geometry.area
            except Exception:
                # Fallback if CRS is missing/problematic
                tqdm.write(f"[WARN] Could not compute area in equal-area CRS for {in_shp}; using raw area")
                gdf['Acreage_m2'] = gdf.geometry.area

            gdf['Acreage_ha'] = (gdf['Acreage_m2'] / 10000.0).round(2)

            # Hole filling + validity on each chunk (safer before concat)
            gdf['geometry'] = gdf.geometry.apply(_fill_holes)
            gdf['geometry'] = gdf.geometry.apply(make_valid)

            chunks.append(gdf)

    # Merge all regions/files for this year and finalize
    if not chunks:
        tqdm.write(f"[INFO] Nothing to write for {year_key}")
        continue

    gdf_all = gpd.GeoDataFrame(pd.concat(chunks, ignore_index=True), geometry="geometry", crs=chunks[0].crs)

    # Reproject to EPSG:4326 for output
    try:
        gdf_all = gdf_all.to_crs(epsg=4326)
    except Exception:
        tqdm.write(f"[WARN] Could not reproject to EPSG:4326 for {year_key}; writing in source CRS")

    # Cleanups
    gdf_all = gdf_all.drop(columns=['name'], errors='ignore')

    # Assign unique fd_id per year output
    gdf_all['fd_id'] = np.arange(1, len(gdf_all) + 1)

    # Final geometry cleanup
    gdf_all['geometry'] = gdf_all.geometry.apply(_fill_holes)
    gdf_all['geometry'] = gdf_all.geometry.apply(make_valid)

    # Save
    out_shp = os.path.join(out_dir, f"CPF_fields_{year_key}.geojson")
    try:
        gdf_all.to_file(out_shp,driver="GeoJSON")
    except Exception as e:
        tqdm.write(f"[ERROR] Failed to write {out_shp}: {e}")
