In [None]:
import geopandas as gpd
import geemap
import ee

import concurrent.futures
import numpy as np
from tqdm.std import tqdm

In [None]:
gdf = gpd.read_parquet("data/usa_delineation/iowa.parquet")
# gdf = gdf.iloc[0:1000]

In [None]:
def calculate_mode_and_mean(geometry):

    try:
        geometry = ee.Geometry(geometry.__geo_interface__)

        image = ee.ImageCollection('USDA/NASS/CDL') \
            .filter(ee.Filter.date('2023-01-01', '2023-01-02')) \
            .first() \
            .select(["cropland", "confidence"])
        cropped_image = image.clip(geometry)
        mode = cropped_image.select('cropland') \
                            .reduceRegion(reducer=ee.Reducer.mode(), 
                                        geometry=geometry, 
                                        scale=30, 
                                        maxPixels=1e13) \
                            .get('cropland')
        
        class_number = ee.Number(mode).round()
        modal_mask = cropped_image.select('cropland').eq(class_number)
        confidence_masked = cropped_image.select('confidence').updateMask(modal_mask)
        mean_confidence = confidence_masked.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=30,
            maxPixels=1e13
        ).get('confidence')

        percent_class = modal_mask.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=30,
            maxPixels=1e13
        ).get('cropland')

        return class_number.getInfo(), ee.Number(mean_confidence).getInfo()/100, ee.Number(percent_class).getInfo()
    except:  # noqa: E722
        return -1, 0, 0

In [None]:
palette = [
    '#ffff00',  # Milho
    '#00ff00',  # Soja
    '#006400',  # Trigo
    '#8b4513',  # Feno
    '#ff0000',  # Algodão
    '#ff69b4',  # Milho doce
    '#ee82ee',  # Sorgo
    '#a52a2a',  # Alfalfa
    '#d2691e',  # Girassol
    '#32cd32',  # Outras plantações
    '#ff8c00',  # Arbustos
    '#0000ff',  # Água
    '#708090'   # Áreas desenvolvidas
]

In [None]:
ee.Initialize()

# cdl = ee.ImageCollection('USDA/NASS/CDL') \
#           .filter(ee.Filter.date('2023-01-01', '2023-01-02')) \
#           .first() \
#           .select(["cropland", "confidence"])


# region = gdf.iloc[4].geometry

# print(calculate_mode_and_mean(region))

# region = ee.Geometry(region.__geo_interface__).buffer(-30)

# cropped_image = cdl.clip(region)

# Map = geemap.Map()
# Map.addLayer(cropped_image, {'bands': ['cropland'], 'palette': palette, 'min': 1, 'max': 254}, 'CDL Cropped')
# Map.centerObject(region)

# Map

In [None]:
# Run the function in parallel, using ThreadPoolExecutor
croplands = np.zeros(len(gdf), dtype=np.uint8)
confidences = np.zeros(len(gdf), dtype=np.half)
percentages = np.zeros(len(gdf), dtype=np.half)

with concurrent.futures.ThreadPoolExecutor() as executor:
    all_futures = []
    for n, row in gdf.iterrows():
        all_futures.append(executor.submit(lambda geometry, x: [calculate_mode_and_mean(geometry), x], row.geometry, n))

    # Get result as completed
    for future in tqdm(concurrent.futures.as_completed(all_futures), total=len(all_futures)):
        result, y = future.result()
        croplands[y] = result[0]
        confidences[y] = result[1]
        percentages[y] = result[2]

In [None]:
gdf["crop_class"] = croplands
gdf["crop_confidence"] = confidences
gdf["crop_percentage"] = percentages

In [None]:
gdf[(gdf.crop_confidence >0.9) & (gdf.crop_percentage > 0.9)].groupby("crop_class").size()