In [4]:
# pip install rasterio geopandas shapely fiona pyproj
from pathlib import Path
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape

def polygonize_labels(arr, transform, crs, nodata=None):
    """Polygonize integer labels into a GeoDataFrame with columns ['class','geometry']."""
    mask = None if nodata is None else (arr != nodata)
    feats = []
    for geom, val in shapes(arr, mask=mask, transform=transform):
        feats.append({"class": int(val), "geometry": shape(geom)})
    gdf = gpd.GeoDataFrame(feats, geometry="geometry", crs=crs)
    # Clean up invalid/empty geometries
    if not gdf.empty:
        gdf = gdf[~gdf.geometry.is_empty & gdf.geometry.is_valid]
    return gdf

def add_area(poly_gdf, area_crs="EPSG:6933"):
    """Add area_m2/area_ha using an equal-area CRS (defaults to EPSG:6933)."""
    if poly_gdf.empty:
        poly_gdf["area_m2"] = []
        poly_gdf["area_ha"] = []
        return poly_gdf
    g_eq = poly_gdf.to_crs(area_crs)
    poly_gdf["area_m2"] = g_eq.area.values
    poly_gdf["area_ha"] = poly_gdf["area_m2"] / 10_000.0
    return poly_gdf

def load_all_rasters_to_dataframe(
    folder,
    file_glob="raster_labels_*.tif",
    target_crs="EPSG:4326",   # choose a single common CRS for merge (e.g., EPSG:4326)
    keep_classes=None,        # e.g. [1] to keep only “field” class; None keeps all classes
    compute_area=False,       # set True to add area_m2/area_ha
    area_crs="EPSG:6933"      # global equal-area; swap to local equal-area if you prefer
):
    """
    Scan folder, polygonize each raster, reproject polygons to target_crs, and return one GeoDataFrame.
    """
    folder = Path(folder)
    files = sorted(folder.glob(file_glob))
    if not files:
        raise FileNotFoundError(f"No files found matching {file_glob} in {folder}")

    all_polys = []
    for f in files:
        with rasterio.open(f) as src:
            arr = src.read(1)
            gdf = polygonize_labels(arr, src.transform, src.crs, src.nodata)

        if keep_classes is not None:
            gdf = gdf[gdf["class"].isin(keep_classes)]

        if gdf.empty:
            continue

        # Reproject THIS TILE to the common CRS before collecting
        gdf = gdf.to_crs(target_crs)
        gdf["source_file"] = f.name
        all_polys.append(gdf)

    if not all_polys:
        # Return an empty, correctly-formed GDF in the target CRS
        return gpd.GeoDataFrame(columns=["class", "source_file", "geometry"], geometry="geometry", crs=target_crs)

    merged = gpd.GeoDataFrame(pd.concat(all_polys, ignore_index=True), geometry="geometry", crs=target_crs)

    if compute_area:
        merged = add_area(merged, area_crs=area_crs)

    return merged

# -------- Example usage --------
all_fields_gdf_test = load_all_rasters_to_dataframe(
    folder="/media/disk3/raman/code/Scrubland-Field-Delineation/analysis/data/AgriFieldNet/ref_agrifieldnet_competition_v1_labels_test/All_rasters",
    file_glob="raster_labels_*.tif",
    target_crs="EPSG:4326",   # or set to an India-appropriate CRS if you prefer
    keep_classes=None,        # or e.g. [1] if 1 = “fields”
    compute_area=True,        # set to True if you want area columns
    area_crs="EPSG:6933"      # equal-area for area accuracy
)

print(all_fields_gdf.crs)
print(len(all_fields_gdf), "polygons")
print(all_fields_gdf.head())


FileNotFoundError: No files found matching raster_labels_*.tif in /media/disk3/raman/code/Scrubland-Field-Delineation/analysis/data/AgriFieldNet/ref_agrifieldnet_competition_v1_labels_test/All_rasters

In [3]:
all_fields_gdf

Unnamed: 0,class,geometry,source_file,area_m2,area_ha
0,4,"POLYGON ((83.39846 19.28666, 83.39846 19.28657...",raster_labels_0.tif,9492.677276,0.949268
1,4,"POLYGON ((83.40877 19.28274, 83.40877 19.28265...",raster_labels_0.tif,9992.156990,0.999216
2,4,"POLYGON ((83.41131 19.27421, 83.4113 19.27394,...",raster_labels_0.tif,299.763729,0.029976
3,9,"POLYGON ((87.4448 25.58463, 87.4448 25.58445, ...",raster_labels_1.tif,1501.126890,0.150113
4,15,"POLYGON ((87.43941 25.58139, 87.43941 25.58112...",raster_labels_1.tif,2401.805213,0.240181
...,...,...,...,...,...
4670,4,"POLYGON ((83.85544 19.1127, 83.85544 19.11261,...",raster_labels_998.tif,4593.419836,0.459342
4671,1,"POLYGON ((81.59046 27.1224, 81.59046 27.12231,...",raster_labels_999.tif,3002.147434,0.300215
4672,1,"POLYGON ((81.5842 27.12044, 81.5842 27.12035, ...",raster_labels_999.tif,2701.937347,0.270194
4673,1,"POLYGON ((81.58459 27.11818, 81.58459 27.118, ...",raster_labels_999.tif,3202.296001,0.320230


In [21]:
# pip install rasterio geopandas shapely fiona pyproj
from pathlib import Path
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape, Polygon, LineString

def polygonize_labels(arr, transform, crs, nodata=None, keep_classes=None):
    """Polygonize integer labels into a GeoDataFrame with ['class','geometry']."""
    mask = None if nodata is None else (arr != nodata)
    feats = []
    for geom, val in shapes(arr, mask=mask, transform=transform):
        val = int(val)
        if keep_classes is not None and val not in keep_classes:
            continue
        feats.append({"class": val, "geometry": shape(geom)})
    gdf = gpd.GeoDataFrame(feats, geometry="geometry", crs=crs)
    if not gdf.empty:
        gdf = gdf[~gdf.geometry.is_empty & gdf.geometry.is_valid]
    return gdf

def boundaries_from_polygons(poly_gdf, include_interiors=True):
    """Create boundary lines from polygon exteriors (and hole rings if requested)."""
    if poly_gdf.empty:
        return gpd.GeoDataFrame(columns=["class", "geometry"], geometry="geometry", crs=poly_gdf.crs)
    line_geoms, classes = [], []
    for cls, geom in zip(poly_gdf["class"].to_numpy(), poly_gdf.geometry):
        line_geoms.append(geom.boundary)
        classes.append(cls)
        if include_interiors and isinstance(geom, Polygon):
            for ring in geom.interiors:
                line_geoms.append(LineString(ring.coords))
                classes.append(cls)
    lines = gpd.GeoDataFrame({"class": classes, "geometry": line_geoms}, geometry="geometry", crs=poly_gdf.crs)
    # Optional: dissolve per class then explode for cleaner boundaries
    lines = lines.dissolve(by="class").explode(index_parts=False).reset_index()
    return lines

def add_area(poly_gdf, area_crs="EPSG:6933"):
    """Add area_m2/area_ha using an equal-area CRS (global default EPSG:6933)."""
    if poly_gdf.empty:
        poly_gdf["area_m2"] = []
        poly_gdf["area_ha"] = []
        return poly_gdf
    g_eq = poly_gdf.to_crs(area_crs)
    poly_gdf["area_m2"] = g_eq.area.values
    poly_gdf["area_ha"] = poly_gdf["area_m2"] / 10_000.0
    return poly_gdf

def load_test_field_ids_to_dataframe(
    root_folder,
    target_crs="EPSG:4326",   # common CRS for merging (e.g., EPSG:4326)
    treat_zero_as_nodata=True, # many field-id rasters use 0 as background
    keep_classes=None,         # e.g., [1,2,...] if you want to filter ids; None keeps all >0
    compute_area=False,
    area_crs="EPSG:6933",
    return_boundaries=False
):
    """
    Recursively finds **/field_ids.tif under `root_folder`, polygonizes each,
    reprojects to `target_crs`, and concatenates into a single GeoDataFrame.

    Returns:
      - polygons_gdf  (and optionally boundaries_gdf if return_boundaries=True)
    """
    root = Path(root_folder)
    files = sorted(root.rglob("field_ids.tif"))
    if not files:
        raise FileNotFoundError(f"No field_ids.tif found under: {root_folder}")

    all_polys = []
    for f in files:
        with rasterio.open(f) as src:
            arr = src.read(1)
            nodata = src.nodata
            # Optionally coerce 0 to nodata (typical for background)
            if treat_zero_as_nodata and (nodata is None):
                nodata = 0

            gdf = polygonize_labels(arr, src.transform, src.crs, nodata=nodata, keep_classes=keep_classes)
            if gdf.empty:
                continue

            # Reproject per-raser output to a single CRS before merging
            gdf = gdf.to_crs(target_crs)
            gdf["source_file"] = str(f.relative_to(root))  # track origin
            all_polys.append(gdf)

    if not all_polys:
        polygons = gpd.GeoDataFrame(columns=["class", "source_file", "geometry"], geometry="geometry", crs=target_crs)
        return (polygons, polygons) if return_boundaries else polygons

    polygons = gpd.GeoDataFrame(pd.concat(all_polys, ignore_index=True), geometry="geometry", crs=target_crs)

    if compute_area:
        polygons = add_area(polygons, area_crs=area_crs)

    if return_boundaries:
        boundaries = boundaries_from_polygons(polygons, include_interiors=True)
        # carry source_file via spatial join (boundaries may mix sources after dissolve)
        try:
            # approximate: assign each boundary to the polygon it touches most
            joined = gpd.sjoin_nearest(boundaries, polygons[["source_file", "geometry"]], how="left")
            boundaries["source_file"] = joined["source_file"].values
        except Exception:
            # If sjoin_nearest not available, skip source propagation
            pass
        return polygons, boundaries

    return polygons

# ---------------- Example usage ----------------
# Root of the test labels directory (it will recurse inside):
root_test = "/media/disk3/raman/code/Scrubland-Field-Delineation/analysis/data/AgriFieldNet/ref_agrifieldnet_competition_v1_labels_test"
root_train = "/media/disk3/raman/code/Scrubland-Field-Delineation/analysis/data/AgriFieldNet/ref_agrifieldnet_competition_v1_labels_train"


# 1) Get polygons only

train_polys = test_polys = load_test_field_ids_to_dataframe(
    root_folder=root_train,
    target_crs="EPSG:4326",
    treat_zero_as_nodata=True,
    keep_classes=None,       # keep all non-zero IDs
    compute_area=True,       # add area_m2/ha using equal-area CRS
    area_crs="EPSG:6933",
    return_boundaries=False
)

test_polys = load_test_field_ids_to_dataframe(
    root_folder=root_test,
    target_crs="EPSG:4326",
    treat_zero_as_nodata=True,
    keep_classes=None,       # keep all non-zero IDs
    compute_area=True,       # add area_m2/ha using equal-area CRS
    area_crs="EPSG:6933",
    return_boundaries=False
)



In [29]:
df_merged = gpd.GeoDataFrame(
    pd.concat([train_polys, test_polys], ignore_index=True),
    crs=train_polys.crs  # or gdf2.crs (must be same)
)

In [31]:
import geopandas as gpd
import pandas as pd

def gdf_to_csv_for_gee(gdf, out_csv="gee_upload.csv"):
    """
    Convert a GeoDataFrame of polygons to CSV (lat/lon + attributes) for GEE.
    Uses centroids of polygons.
    """
    # Ensure everything is in WGS84 (EPSG:4326)
    gdf_wgs = gdf.to_crs(epsg=4326)

    # Extract centroid coordinates
    gdf_wgs["lon"] = gdf_wgs.geometry.centroid.x
    gdf_wgs["lat"] = gdf_wgs.geometry.centroid.y

    # Drop geometry and save to CSV
    cols = ["lon", "lat"] + [c for c in gdf_wgs.columns if c not in ["geometry", "lon", "lat"]]
    gdf_wgs[cols].to_csv(out_csv, index=False)

    print(f"Saved {len(gdf_wgs)} features to {out_csv}")

# Example usage with test_polys2
gdf_to_csv_for_gee(df_merged, "data/agrifieldnet.csv")

Saved 7572 features to data/agrifieldnet.csv


In [19]:
import geemap
import geopandas as gpd

# Assuming test_polys2 is already created (GeoDataFrame of polygons)
# If not, load it from file or rerun the function from earlier.

# Create an interactive map centered on India (adjust center as needed)
m = geemap.Map(center=[22.5, 79], zoom=5)

url = 'https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}'
m.layout.height = '1000px'
m.add_tile_layer(url, name="Google Map", attribution="Google")
# Convert GeoDataFrame → Earth Engine FeatureCollection
fc = geemap.geopandas_to_ee(test_polys2)

# Add polygons to map with style
m.addLayer(fc, {"color": "blue", "fillColor": "00000000"}, "Field Polygons")

# Display map (works inside Jupyter Notebook / JupyterLab / Colab)
m


Map(center=[22.5, 79], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright', tr…

In [33]:
fc = geemap.geopandas_to_ee(df_merged)

In [36]:
df_merged.to_file("data/agrifieldnet_all.shp",)

In [2]:
import re, ee
ee.Authenticate() #Uncomment this whenever needed, once done usually not needed for 1-2 days
ee.Initialize(project='raman-461708')

PARENT = 'projects/raman-461708/assets'

# ---- List all image assets once (fast follow-up lookups) ----
def _basename(asset_name: str) -> str:
    return asset_name.split('/')[-1]

all_assets = ee.data.listAssets({'parent': PARENT}).get('assets', [])
image_assets = [a for a in all_assets if a.get('type','').lower() == 'image']
basenames = {_basename(a['name']) for a in image_assets}

def asset_exists_base(base: str) -> bool:
    return base in basenames

def fullpath(base: str) -> str:
    return f'{PARENT}/{base}'

def list_part_bases_for_aez(aez_no: int):
    """Return sorted basenames for all parts like AEZ_{n}_v4_local_partK (K numeric)."""
    rx = re.compile(rf'^AEZ_{aez_no}_v4_local_part(\d+)$')
    parts = []
    for b in basenames:
        m = rx.match(b)
        if m:
            parts.append((int(m.group(1)), b))
    # Sort by numeric suffix to keep deterministic order (not required for mosaic, but nice to have).
    parts.sort(key=lambda x: x[0])
    return [b for _, b in parts]

def choose_assets_for_aez(aez_no: int):
    """
    Preference:
      1) AEZ_{n}_v4
      2) AEZ_{n}_v4_local_part*
      3) AEZ_{n}_v4_local
    Returns: list of full asset IDs (possibly multiple if parts), plus a metadata dict.
    """
    base_v4 = f'AEZ_{aez_no}_v4'
    base_local = f'AEZ_{aez_no}_v4_local'
    part_bases = list_part_bases_for_aez(aez_no)

    if asset_exists_base(base_v4):
        return [fullpath(base_v4)], {'source':'v4','parts':[]}

    if part_bases:
        return [fullpath(b) for b in part_bases], {'source':'v4_local_parts','parts':part_bases}

    if asset_exists_base(base_local):
        return [fullpath(base_local)], {'source':'v4_local','parts':[]}

    return [], {'source':'missing','parts':[]}

# ---- Collect images AEZ 1..19 ----
images = []
report = {}   # aez_no -> {'source':..., 'parts':[...] }
missing = []

for aez_no in range(1, 20):
    asset_ids, meta = choose_assets_for_aez(aez_no)
    report[aez_no] = meta
    if asset_ids:
        # If this AEZ has multiple parts, add them all; mosaic will merge them.
        for aid in asset_ids:
            images.append(ee.Image(aid))
    else:
        missing.append(aez_no)

if not images:
    raise RuntimeError("No AEZ images found across v4 / v4_local_parts / v4_local")

roi = ee.FeatureCollection("users/mtpictd/agro_eco_regions")

# ---- Final mosaic across all gathered images ----
# Order: AEZ 1..19; within an AEZ, parts in ascending numeric order.
# ImageCollection.mosaic() draws later images on top of earlier ones.
lulc_v4 = ee.ImageCollection(images).mosaic()
lulc_v3 = ee.Image('projects/corestack-datasets/assets/datasets/LULC_v3_river_basin/pan_india_lulc_v3_2023_2024').select('predicted_label')
farm_boundaries = ee.FeatureCollection('projects/raman-461708/assets/india_10k_fields')

dw = ee.ImageCollection('GOOGLE/DYNAMICWORLD/V1').filterDate('2024-07-01', '2025-07-01').filterBounds(farm_boundaries)
dw = dw.select('label').mode().clip(farm_boundaries)

In [None]:
farm_boundaries = ee.FeatureCollection('projects/raman-461708/assets/agrifieldnet_all')

lulc = lulc_v4
# Inputs:
# lulc: ee.Image with band 'predicted_label' (already clipped to farm_boundaries)
# farm_boundaries: ee.FeatureCollection
band    = 'predicted_label'
classes = [0,1,2,3,4,5,6,7,8, 9, 10, 11, 12, 13]
classes = [9, 10, 11, 12]

mask = lulc.select(band).remap(classes, [1]*len(classes), 0)
area_img = ee.Image.pixelArea().rename('area_m2').updateMask(mask)

def get_area(feat):
    geom = feat.geometry()
    # Total area in m² and hectares
    stats   = area_img.reduceRegion(
        reducer   = ee.Reducer.sum(),
        geometry  = geom,
        scale     = 10,
        maxPixels = 1e13,
        tileScale = 4
    )
    area_m2 = ee.Number(stats.get('area_m2'))
    return area_m2

farm_boundaries = farm_boundaries.map(lambda f: f.set('area_m2', get_area(f)))
area_inside = farm_boundaries.aggregate_array('area_m2').getInfo()
print(sum(area_inside))

20569063.869873703


In [5]:
17966017.655689754/ 23742731.206060678 * 100

75.66954913385729

In [7]:
20569063.869873703/ 20569063.869873703 * 100

100.0