In [10]:
import ee
import time

ee.Initialize(project='kochias2',opt_url='https://earthengine-highvolume.googleapis.com')
out_folder = 'projects/kochias2/assets/CropMask_embedding'

In [9]:
# ------------------------------------------------------------------
# 0) PREP GRID, DATASET, MODEL, YEARS (same as before)
# ------------------------------------------------------------------
# grid = ee.FeatureCollection('projects/ee-thuan-ha/assets/grid_SK/grid_SK')
# grid = ee.FeatureCollection('projects/ee-thuan-ha/assets/grid_SK/grid_AB')
grid = ee.FeatureCollection('projects/ee-thuan-ha/assets/grid_SK/grid_MB')
n = grid.size()
grid_list = grid.toList(n)
ids = ee.List.sequence(1, n)

In [11]:
# shpID is your FeatureCollection with ID 1..n
shpID = ee.FeatureCollection(ids.zip(grid_list).map(lambda pair: ee.Feature(ee.List(pair).get(1)).set('ID', ee.List(pair).get(0))))
total_tiles = shpID.size().getInfo()
# print("Total tiles:", total_tiles)

# Server-side list of features (STAYS server-side)
shp_list = shpID.toList(total_tiles)

# On the Python side, we just track indices 0..total_tiles-1
indices = list(range(total_tiles))  # or subset if you want
print("Number of tiles to export:", len(indices))

Number of tiles to export: 239


In [12]:
def start_export_for_tile(idx):
    # idx is a Python int, used as index into server-side list
    fea  = ee.Feature(shp_list.get(idx))   # still server-side
    geom = fea.geometry()                  # server-side
    gid  = fea.get('ID').getInfo()         # one getInfo per task is OK

    per_year_coll = ee.ImageCollection(years.map(lambda y: classify_year(geom, y)))
    crop_sum = per_year_coll.sum().rename('crop_sum_2017_2024')

    desc     = f'CropSum2017_2024_SK_gridID_{gid}'
    asset_id = f'{out_folder}/{desc}'

    task = ee.batch.Export.image.toAsset(image=crop_sum, description=desc, assetId=asset_id, region=geom, scale=10, maxPixels=1e13)
    task.start()
    print(f"Started export for grid ID {gid}")
    return task

def start_export_for_tile(idx):
    fea  = ee.Feature(shp_list.get(idx))
    geom = fea.geometry()
    gid  = idx + 1  # matches ID sequence you set earlier

    per_year_coll = ee.ImageCollection(years.map(lambda y: classify_year(geom, y))    )
    crop_sum = per_year_coll.sum().rename('crop_sum_2017_2024')
    desc     = f'CropSum2017_2024_AB_gridID_{gid}'
    asset_id = f'{out_folder}/{desc}'
    
    # download task
    task = ee.batch.Export.image.toAsset(
        image=crop_sum, description=desc, assetId=asset_id,
        region=geom, scale=10, maxPixels=1e13    )
    task.start()
    print(f"Started export for grid ID {gid}")
    return task

def classify_year(geom, year):
    year = ee.Number(year)
    start = ee.Date.fromYMD(year, 1, 1)
    end_  = start.advance(1, 'year')
    img_year = (dataset.filterDate(start, end_).filterBounds(geom).mosaic().clip(geom))
    classified = (img_year.select(feature_bands).classify(rf_model).rename('crop'))  # 0/1 band

    return classified

In [13]:
# Input image frol Alpha Earth
batch_size   = 20
total_tiles  = len(indices)
years = ee.List.sequence(2017, 2024)
dataset = ee.ImageCollection('GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL')
feature_bands = dataset.first().bandNames()

rf_model = ee.Classifier.load('projects/just-amp-296821/assets/CropMask_embedding_saveModel/RF_CropMask_embedding_best_T200')


# Loop each grid tile and download
for start_idx in range(0, total_tiles, batch_size):
    batch_indices = indices[start_idx:start_idx + batch_size]
    print(f"\n=== Starting batch {start_idx}–{start_idx + len(batch_indices) - 1} ===")

    tasks = [start_export_for_tile(idx) for idx in batch_indices]
    # Poll until ALL tasks in this batch are finished
    while True:
        states = [t.status()['state'] for t in tasks]
        n_ready   = states.count('READY')
        n_running = states.count('RUNNING')
        n_done    = states.count('COMPLETED')
        n_failed  = states.count('FAILED')
        n_canc    = states.count('CANCELLED')

        print(f'Batch {start_idx}: READY={n_ready}, RUNNING={n_running}, '
              f'COMPLETED={n_done}, FAILED={n_failed}, CANCELLED={n_canc}')

        # Exit loop when no task is still pending
        if n_ready == 0 and n_running == 0:
            print(f'Batch {start_idx} finished.')
            break

        time.sleep(60)  # wait 60 seconds before checking again
        





=== Starting batch 0–19 ===
Started export for grid ID 1
Started export for grid ID 2
Started export for grid ID 3
Started export for grid ID 4
Started export for grid ID 5
Started export for grid ID 6
Started export for grid ID 7
Started export for grid ID 8
Started export for grid ID 9
Started export for grid ID 10
Started export for grid ID 11
Started export for grid ID 12
Started export for grid ID 13
Started export for grid ID 14
Started export for grid ID 15
Started export for grid ID 16
Started export for grid ID 17
Started export for grid ID 18
Started export for grid ID 19
Started export for grid ID 20
Batch 0: READY=18, RUNNING=2, COMPLETED=0, FAILED=0, CANCELLED=0
Batch 0: READY=8, RUNNING=0, COMPLETED=0, FAILED=12, CANCELLED=0
Batch 0: READY=0, RUNNING=0, COMPLETED=0, FAILED=20, CANCELLED=0
Batch 0 finished.

=== Starting batch 20–39 ===
Started export for grid ID 21
Started export for grid ID 22
Started export for grid ID 23
Started export for grid ID 24
Started export fo