In [None]:
import geopandas as gpd
import geemap
import ee

import concurrent.futures
import numpy as np
from tqdm.std import tqdm

import matplotlib.pyplot as plt

In [None]:
gdf = gpd.read_parquet("data/usa_delineation/california.parquet")
gdf = gdf.iloc[100000:].reset_index(drop=True)
# gdf = gdf.sample(50000, random_state=42).reset_index(drop=True)

In [None]:
try:
    ee.Initialize()
except:
    ee.Authenticate()
    ee.Initialize()

In [None]:
def calculate_mode_and_mean(geometry):

    try:
        geometry = ee.Geometry(geometry.__geo_interface__)

        image = ee.ImageCollection('USDA/NASS/CDL') \
            .filter(ee.Filter.date('2023-01-01', '2023-01-02')) \
            .first() \
            .select(["cropland", "confidence"])
        cropped_image = image.clip(geometry)
        mode = cropped_image.select('cropland') \
                            .reduceRegion(reducer=ee.Reducer.mode(), 
                                        geometry=geometry, 
                                        scale=30, 
                                        maxPixels=1e13) \
                            .get('cropland')
        
        class_number = ee.Number(mode).round()
        modal_mask = cropped_image.select('cropland').eq(class_number)
        confidence_masked = cropped_image.select('confidence').updateMask(modal_mask)
        mean_confidence = confidence_masked.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=30,
            maxPixels=1e13
        ).get('confidence')

        percent_class = modal_mask.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            scale=30,
            maxPixels=1e13
        ).get('cropland')

        return class_number.getInfo(), ee.Number(mean_confidence).getInfo()/100, ee.Number(percent_class).getInfo()
    except:  # noqa: E722
        return 0, 0, 0

In [None]:
crop_names = {
    1: "Corn",
    2: "Cotton",
    3: "Rice",
    4: "Sorghum",
    5: "Soybeans",
    6: "Sunflower",
    10: "Peanuts",
    11: "Tobacco",
    12: "Sweet Corn",
    13: "Pop or Orn Corn",
    14: "Mint",
    21: "Barley",
    22: "Durum Wheat",
    23: "Spring Wheat",
    24: "Winter Wheat",
    25: "Other Small Grains",
    26: "Dbl Crop WinWht/Soybeans",
    27: "Rye",
    28: "Oats",
    29: "Millet",
    30: "Speltz",
    31: "Canola",
    32: "Flaxseed",
    33: "Safflower",
    34: "Rape Seed",
    35: "Mustard",
    36: "Alfalfa",
    37: "Other Hay/Non Alfalfa",
    38: "Camelina",
    39: "Buckwheat",
    41: "Sugarbeets",
    42: "Dry Beans",
    43: "Potatoes",
    44: "Other Crops",
    45: "Sugarcane",
    46: "Sweet Potatoes",
    47: "Misc Vegs & Fruits",
    48: "Watermelons",
    49: "Onions",
    50: "Cucumbers",
    51: "Chick Peas",
    52: "Lentils",
    53: "Peas",
    54: "Tomatoes",
    55: "Caneberries",
    56: "Hops",
    57: "Herbs",
    58: "Clover/Wildflowers",
    59: "Sod/Grass Seed",
    60: "Switchgrass",
    61: "Fallow/Idle Cropland",
    63: "Forest",
    64: "Shrubland",
    65: "Barren",
    66: "Cherries",
    67: "Peaches",
    68: "Apples",
    69: "Grapes",
    70: "Christmas Trees",
    71: "Other Tree Crops",
    72: "Citrus",
    74: "Pecans",
    75: "Almonds",
    76: "Walnuts",
    77: "Pears",
    81: "Clouds/No Data",
    82: "Developed",
    83: "Water",
    87: "Wetlands",
    88: "Nonag/Undefined",
    92: "Aquaculture",
    111: "Open Water",
    112: "Perennial Ice/Snow",
    121: "Developed/Open Space",
    122: "Developed/Low Intensity",
    123: "Developed/Med Intensity",
    124: "Developed/High Intensity",
    131: "Barren",
    141: "Deciduous Forest",
    142: "Evergreen Forest",
    143: "Mixed Forest",
    152: "Shrubland",
    176: "Grassland/Pasture",
    190: "Woody Wetlands",
    195: "Herbaceous Wetlands",
    204: "Pistachios",
    205: "Triticale",
    206: "Carrots",
    207: "Asparagus",
    208: "Garlic",
    209: "Cantaloupes",
    210: "Prunes",
    211: "Olives",
    212: "Oranges",
    213: "Honeydew Melons",
    214: "Broccoli",
    215: "Avocados",
    216: "Peppers",
    217: "Pomegranates",
    218: "Nectarines",
    219: "Greens",
    220: "Plums",
    221: "Strawberries",
    222: "Squash",
    223: "Apricots",
    224: "Vetch",
    225: "Dbl Crop WinWht/Corn",
    226: "Dbl Crop Oats/Corn",
    227: "Lettuce",
    228: "Dbl Crop Triticale/Corn",
    229: "Pumpkins",
    230: "Dbl Crop Lettuce/Durum Wht",
    231: "Dbl Crop Lettuce/Cantaloupe",
    232: "Dbl Crop Lettuce/Cotton",
    233: "Dbl Crop Lettuce/Barley",
    234: "Dbl Crop Durum Wht/Sorghum",
    235: "Dbl Crop Barley/Sorghum",
    236: "Dbl Crop WinWht/Sorghum",
    237: "Dbl Crop Barley/Corn",
    238: "Dbl Crop WinWht/Cotton",
    239: "Dbl Crop Soybeans/Cotton",
    240: "Dbl Crop Soybeans/Oats",
    241: "Dbl Crop Corn/Soybeans",
    242: "Blueberries",
    243: "Cabbage",
    244: "Cauliflower",
    245: "Celery",
    246: "Radishes",
    247: "Turnips",
    248: "Eggplants",
    249: "Gourds",
    250: "Cranberries",
    254: "Dbl Crop Barley/Soybeans",
}

In [None]:
# cdl = ee.ImageCollection('USDA/NASS/CDL') \
#           .filter(ee.Filter.date('2023-01-01', '2023-01-02')) \
#           .first() \
#           .select(["cropland", "confidence"])


# region = gdf.iloc[4].geometry

# print(calculate_mode_and_mean(region))

# region = ee.Geometry(region.__geo_interface__).buffer(-30)

# cropped_image = cdl.clip(region)

# Map = geemap.Map()
# Map.addLayer(cropped_image, {'bands': ['cropland'], 'palette': palette, 'min': 1, 'max': 254}, 'CDL Cropped')
# Map.centerObject(region)

# Map

In [None]:
# Run the function in parallel, using ThreadPoolExecutor
croplands = np.zeros(len(gdf), dtype=np.uint8)
confidences = np.zeros(len(gdf), dtype=np.half)
percentages = np.zeros(len(gdf), dtype=np.half)

with concurrent.futures.ThreadPoolExecutor() as executor:
    all_futures = []
    for n, row in gdf.iterrows():
        all_futures.append(executor.submit(lambda geometry, x: [calculate_mode_and_mean(geometry), x], row.geometry, n))

    # Get result as completed
    for future in tqdm(concurrent.futures.as_completed(all_futures), total=len(all_futures)):
        result, y = future.result()
        croplands[y] = result[0]
        confidences[y] = result[1]
        percentages[y] = result[2]

In [None]:
gdf["crop_class"] = croplands
gdf["crop_confidence"] = confidences
gdf["crop_percentage"] = percentages

In [None]:
gdf.to_parquet("2_california.parquet", compression="brotli")

In [None]:
import pandas as pd

df0 = gpd.read_parquet("0_california.parquet")
df1 = gpd.read_parquet("1_california.parquet")
df2 = gpd.read_parquet("2_california.parquet")

gdf = pd.concat([df0, df1, df2], ignore_index=True)

In [None]:
gdf["crop_names"] = gdf.crop_class.map(crop_names)

In [None]:
_, ax = plt.subplots(figsize=(15, 5))

gdf.crop_names.groupby(gdf.crop_names).count().plot.bar(ax=ax)

plt.show()

In [None]:
filtered_gdf = gdf[(gdf.crop_confidence > 0.9) & (gdf.crop_percentage > 0.9)].reset_index(drop=True)

In [None]:
_, ax = plt.subplots(figsize=(15, 5))

filtered_gdf.crop_names.groupby(filtered_gdf.crop_names).count().plot.bar(ax=ax, color="purple")

plt.show()

In [None]:
filtered_gdf.crop_names.groupby(filtered_gdf.crop_names).count().sort_values(ascending=False).head(50)