In [None]:
import ee
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from datetime import datetime
from datetime import date
from concurrent.futures import ThreadPoolExecutor
import geopandas as gpd
import glob
import random
from datetime import datetime, timedelta

ee.Authenticate()

In [None]:
# Authenticate & initialize Earth Engine
ee.Initialize(project='ee-mkmitchellducks')
log_file = "background_log.txt"
# -------------------------
# CONFIG
# -------------------------
asset_folder = "projects/ee-mkmitchellducks/assets/gbif"
aoifile = '/mnt/e/gis/BaseData/MAV_Boundary_4326_wkb.parquet'

scale = 100  # Adjust based on your imagery resolution
aoi_gdf = gpd.read_parquet(aoifile)
aoi_geom = ee.Geometry.Polygon(list(aoi_gdf.geometry.union_all().exterior.coords))
# Buffer by 5000 m for focal stats
aoi_buffered = aoi_geom.buffer(5000)  # 5 km buffer

In [None]:
# -------------------------
# Logging
# -------------------------
def log(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as f:
        f.write(f"[{timestamp}] {message}\n")
    print(message)

# -------------------------
# Split FeatureCollection into subsets
# -------------------------
def split_fc(fc, n_subsets=10):
    print('splitting fc')
    n_points = fc.size().getInfo()
    points_list = fc.toList(n_points)
    subsets = []
    step = n_points // n_subsets + 1
    for i in range(0, n_points, step):
        subset = ee.FeatureCollection(points_list.slice(i, i + step))
        subsets.append(subset)
    print('split')
    return subsets
    
# -------------------------
# Dynamic World mode image
# -------------------------
def get_dw_mode_image(obs_date):
    obs_date = ee.Date(obs_date)
    start_date = obs_date.advance(-3, 'month')
    dw_collection = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1").select("label")
    dw_filtered = dw_collection.filterDate(start_date, obs_date)
    return dw_filtered.reduce(ee.Reducer.mode())

# -------------------------
# Percent cover for DW classes
# -------------------------
def compute_dw_percent_cover(dw_img, radius_m):
    class_ids = list(range(9))
    kernel = ee.Kernel.circle(radius=radius_m, units='meters', normalize=True)
    cover_images = []
    for class_id in class_ids:
        mask = dw_img.eq(class_id)
        pct = mask.reduceNeighborhood(ee.Reducer.mean(), kernel).multiply(100).rename(f'dw_class_{class_id}_pct_{radius_m}m')
        cover_images.append(pct)
    return ee.Image.cat(cover_images)

# -------------------------
# Forest edge and core
# -------------------------
def compute_forest_metrics(dw_img, radius_m):
    forest_mask = dw_img.eq(1)
    non_forest_mask = dw_img.neq(1)

    forest_edges = ee.Algorithms.CannyEdgeDetector(image=forest_mask, threshold=0.5, sigma=1)
    edge_density = forest_edges.reduceNeighborhood(
        ee.Reducer.sum(), ee.Kernel.circle(radius=radius_m, units='meters')
    ).rename('forest_edge_length')

    non_forest_buffer = non_forest_mask.focal_max(radius=100, units='meters')
    forest_core = forest_mask.And(non_forest_buffer.Not()).rename('forest_core')

    return ee.Image.cat([edge_density, forest_core])
    
# -------------------------
# Assign random date between 2017 and 2020
# -------------------------
def assign_random_date(feature):
    mindt = datetime.strptime("2017-01-01", "%Y-%m-%d")
    d = datetime.strptime("2024-12-31", "%Y-%m-%d") - mindt
    d = d.days + 1
    addme = random.randint(1, d)
    new_date = mindt + timedelta(days=addme)
    return feature.set('obs_date', new_date.strftime("%Y-%m-%d"))
    
# -------------------------
# Export Task Function
# -------------------------
def export_subset(sub_fc, species='background'):
    def process_feature(f):
        obs_date = f.get('obs_date')
        dw_img = get_dw_mode_image(obs_date)
        cover_100m = compute_dw_percent_cover(dw_img, radius_m=100)
        cover_10km = compute_dw_percent_cover(dw_img, radius_m=10000)
        forest_metrics_100m = compute_forest_metrics(dw_img, 100)
        forest_metrics_10km = compute_forest_metrics(dw_img, 10000)

        full_img = ee.Image.cat([cover_100m, cover_10km, forest_metrics_100m, forest_metrics_10km])
        sampled = full_img.sampleRegions(
            collection=ee.FeatureCollection([f]),
            scale=100,
            geometries=True,
            tileScale=4
        )
        return sampled

    try:
        print('Setting up exports')
        try:
            sampled_fc = sub_fc.map(process_feature).flatten()
            export_desc = f"background_pts"
            asset_id = f"{asset_folder}/{export_desc}"
        except Exception as e:
            print(e)
        task = ee.batch.Export.table.toDrive(
            collection=sampled_fc,
            description=export_desc,
            fileNamePrefix=asset_id
        )
        log(f"✔ Starting task")
        task.start()
        log(f"✔ Export started for {species}")
    except Exception as e:
        log(f"❌ Export failed for {species}")


In [None]:
# -------------------------
# Generate random points
# -------------------------
n_points = 10000

# Convert GeoPandas geometry to GeoJSON
aoi_geojson = aoi_gdf.geometry.iloc[0].__geo_interface__
# Create EE Geometry from GeoJSON
aoi = ee.Geometry(aoi_geojson)

random_points = ee.FeatureCollection.randomPoints(region=aoi, points=n_points, seed=42)

points_with_dates = random_points.map(assign_random_date)
subsets = split_fc(points_with_dates, n_subsets=10)
export_subset(points_with_dates)

#with ThreadPoolExecutor(max_workers=1) as executor:
#    for i, sub_fc in enumerate(subsets):
#        executor.submit(export_subset, sub_fc, 'background', i)


In [None]:
points_with_dates.first().get('obs_date').getInfo()