In [46]:
import ee
import geopandas as gpd
from shapely import box
import time
import glob
import pandas as pd

ee.Initialize()

In [31]:
# read kebele data
gdf = gpd.read_file("data/Ethiopia_AdminBoundaries.shp").to_crs(32637)
study_regions = ["Tigray", "Amhara"]
kebele_gdf = gdf[gdf["R_NAME"].isin(study_regions)]

In [None]:
# Build uniform 10km grid for study area bounds
min_x, min_y, max_x, max_y = [round(coord) for coord in kebele_gdf.total_bounds.tolist()]
cell_size = 25000
x_cells = int((max_x - min_x) / cell_size)
y_cells = int((max_y - min_y) / cell_size)

polygons = []
tile_ids = []
for i in range(x_cells):
    for j in range(y_cells):

        x1 = min_x + i * cell_size
        y1 = min_y + j * cell_size

        poly = box(x1, y1, x1 + cell_size, y1 + cell_size)
        tile_id = f"{x1}_{y1 + cell_size}"
        
        polygons.append(poly)
        tile_ids.append(tile_id)

tiles = gpd.GeoDataFrame({"tile_id": tile_ids, "geometry": polygons}, crs="EPSG:32637")

# Filter tiles to only keep those that intersect with actual study regions
tiles = tiles[tiles.intersects(kebele_gdf.union_all())]
tiles = tiles.reset_index(drop=True)

In [52]:
tiles

Unnamed: 0,tile_id,geometry
0,93946_988825,"POLYGON ((35.53796 8.70358, 35.53586 8.9293, 3..."
1,93946_1013825,"POLYGON ((35.53586 8.9293, 35.5337 9.15501, 35..."
2,93946_1038825,"POLYGON ((35.5337 9.15501, 35.53148 9.38071, 3..."
3,93946_1063825,"POLYGON ((35.53148 9.38071, 35.5292 9.60642, 3..."
4,93946_1088825,"POLYGON ((35.5292 9.60642, 35.52687 9.83212, 3..."
...,...,...
562,593946_1538825,"POLYGON ((40.09993 13.69102, 40.10099 13.91703..."
563,593946_1563825,"POLYGON ((40.10099 13.91703, 40.10207 14.14304..."
564,593946_1588825,"POLYGON ((40.10207 14.14304, 40.10317 14.36904..."
565,593946_1613825,"POLYGON ((40.10317 14.36904, 40.10429 14.59504..."


In [34]:
def retrieve_crop_cover_statistics(tile_geometry, year):
    """
    Calculate crop cover statistics for a tile in a given year.
    
    Args:
        tile_geometry: ee.Geometry of the tile
        year: Year to analyze
    
    Returns:
        dict with area_ha as ee.Number
    """
    start = ee.Date.fromYMD(year, 1, 1)
    end = start.advance(1, 'year')
    
    # Load Dynamic World collection
    dw = ee.ImageCollection('GOOGLE/DYNAMICWORLD/V1')
    
    # Filter by date and bounds
    dw_yoi = dw.filterDate(start, end).filterBounds(tile_geometry)
    
    # Get mode (most common classification)
    dw_image = ee.Image(dw_yoi.mode())
    
    # Select label band, clip to tile, and identify crop pixels (class 4)
    crop_image = dw_image.select('label').clip(tile_geometry).eq(4)
    
    # Calculate sum of crop pixels
    sumDict = crop_image.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=tile_geometry,
        scale=10,
        maxPixels=1e15
    )
    
    crop_pixel_count = ee.Number(sumDict.get('label'))
    crop_ha = crop_pixel_count.multiply(0.01)  # Each 10m pixel = 0.01 ha
    
    return {'area_ha': crop_ha}


In [35]:
def export_tiles_batch(tiles_gdf, years, batch_number=0, batch_size=30, folder='tiles'):
    """
    Create server-side Earth Engine export task for a batch of tiles.
    
    Args:
        tiles_gdf: GeoDataFrame with tiles
        years: List of years to process
        batch_number: Which batch to process (0-indexed)
        batch_size: Number of tiles per batch
        folder: Google Drive folder name
    
    Returns:
        Earth Engine Task object
    """
    # Calculate batch indices
    start_idx = batch_number * batch_size
    end_idx = min(start_idx + batch_size, len(tiles_gdf))
    tiles_batch = tiles_gdf.iloc[start_idx:end_idx]
    
    print(f'Creating export task for tiles {start_idx} to {end_idx - 1}')
    print(f'Batch contains {len(tiles_batch)} tiles')
    
    # Create features list
    features = []
    
    for idx, tile in tiles_batch.iterrows():
        tile_id = tile['tile_id']
        
        # Convert GeoPandas geometry to Earth Engine geometry
        tile_geojson = tile.geometry.__geo_interface__
        tile_ee_geom = ee.Geometry(tile_geojson)
        
        for year in years:
            # Calculate crop cover statistics
            result = retrieve_crop_cover_statistics(tile_ee_geom, year)
            
            # Create Earth Engine Feature (server-side object)
            feature = ee.Feature(None, {
                'tile_id': str(tile_id),
                'year': year,
                'area_ha': result['area_ha']
            })
            
            features.append(feature)
    
    # Convert to FeatureCollection
    results_collection = ee.FeatureCollection(features)
    
    # Create export task
    task = ee.batch.Export.table.toDrive(
        collection=results_collection,
        description=f'tile_crop_batch_{batch_number}',
        folder=folder,
        fileFormat='CSV',
        selectors=['tile_id', 'year', 'area_ha']
    )
    
    # Start the task
    task.start()
    
    print(f'Export task started: tile_crop_batch_{batch_number}')
    print(f'Task ID: {task.id}')
    
    return task


In [36]:
def process_all_batches(tiles_gdf, years, batch_size=30, folder='tiles'):
    """
    Process all tiles in batches and export to Google Drive.
    
    Args:
        tiles_gdf: GeoDataFrame with tiles
        years: List of years to process
        batch_size: Number of tiles per batch
        folder: Google Drive folder name
    
    Returns:
        List of Task objects
    """
    # Calculate total number of batches
    total_tiles = len(tiles_gdf)
    total_batches = (total_tiles + batch_size - 1) // batch_size
    
    print(f'Total tiles: {total_tiles}')
    print(f'Total batches: {total_batches}')
    print(f'Years: {years}')
    print(f'Total export tasks to create: {total_batches}\n')
    
    tasks = []
    
    # Create export task for each batch
    for batch_num in range(total_batches):
        print(f'\n=== Batch {batch_num + 1}/{total_batches} ===')
        
        task = export_tiles_batch(
            tiles_gdf=tiles_gdf,
            years=years,
            batch_number=batch_num,
            batch_size=batch_size,
            folder=folder
        )
        
        tasks.append(task)
        
        # Small delay to avoid overwhelming the API
        time.sleep(2)
    
    print(f'\n✓ All {len(tasks)} export tasks created!')
    print(f'Files will be saved to Google Drive folder: {folder}')
    
    return tasks

In [37]:
def monitor_tasks(tasks, check_interval=60):
    """
    Monitor the status of export tasks.
    
    Args:
        tasks: List of Task objects
        check_interval: Seconds between status checks
    """
    print(f'\nMonitoring {len(tasks)} tasks...')
    
    while True:
        statuses = [task.status() for task in tasks]
        
        completed = sum(1 for s in statuses if s['state'] == 'COMPLETED')
        failed = sum(1 for s in statuses if s['state'] == 'FAILED')
        running = sum(1 for s in statuses if s['state'] == 'RUNNING')
        ready = sum(1 for s in statuses if s['state'] == 'READY')
        
        print(f'\rCompleted: {completed}, Running: {running}, Ready: {ready}, Failed: {failed}', end='')
        
        if completed + failed == len(tasks):
            print('\n\n✓ All tasks finished!')
            
            if failed > 0:
                print(f'\n⚠ {failed} tasks failed:')
                for i, s in enumerate(statuses):
                    if s['state'] == 'FAILED':
                        print(f"  Batch {i}: {s.get('error_message', 'Unknown error')}")
            
            break
        
        time.sleep(check_interval)

In [44]:
# ====================
# FULL PIPELINE USAGE
# ====================

# Define parameters
years = [2019, 2020, 2021, 2022, 2023, 2024]
batch_size = 50
folder = 'tiles'  # Google Drive folder name

# Process all batches
tasks = process_all_batches(
    tiles_gdf=tiles,
    years=years,
    batch_size=batch_size,
    folder=folder
)

# Monitor progress (optional)
monitor_tasks(tasks, check_interval=60)

# Or check status manually later:
# ee.batch.Task.list()  # Shows all recent tasks

Total tiles: 567
Total batches: 12
Years: [2019, 2020, 2021, 2022, 2023, 2024]
Total export tasks to create: 12


=== Batch 1/12 ===
Creating export task for tiles 0 to 49
Batch contains 50 tiles
Export task started: tile_crop_batch_0
Task ID: IIHJENWXAQ5M4QAJMADFWDTF

=== Batch 2/12 ===
Creating export task for tiles 50 to 99
Batch contains 50 tiles
Export task started: tile_crop_batch_1
Task ID: D5ZCC6YFYKRFU4DEMF7VFHAF

=== Batch 3/12 ===
Creating export task for tiles 100 to 149
Batch contains 50 tiles
Export task started: tile_crop_batch_2
Task ID: CSSFSRKYSHGYF4DFTORHFFEN

=== Batch 4/12 ===
Creating export task for tiles 150 to 199
Batch contains 50 tiles
Export task started: tile_crop_batch_3
Task ID: KYV6X3XJZKDGLHQGWHUUF5J4

=== Batch 5/12 ===
Creating export task for tiles 200 to 249
Batch contains 50 tiles
Export task started: tile_crop_batch_4
Task ID: PT22HJWM7JIYC677DGKK7YBF

=== Batch 6/12 ===
Creating export task for tiles 250 to 299
Batch contains 50 tiles
Export task

In [47]:
# Read all batch CSV files from Google Drive
csv_files = glob.glob('data/crop_cover_results/tile_crop_batch_*.csv')

# Combine into single DataFrame
all_data = []
for file in csv_files:
    df = pd.read_csv(file)
    all_data.append(df)

crop_cover_df = pd.concat(all_data, ignore_index=True)

# Sort by tile_id and year
crop_cover_df = crop_cover_df.sort_values(['tile_id', 'year'])

# Save combined results
crop_cover_df.to_csv('tile_crop_cover_all_years.csv', index=False)

print(f'Total records: {len(crop_cover_df)}')
print(crop_cover_df.head())

Total records: 3402
            tile_id  year      area_ha
168  118946_1013825  2019  6688.191255
169  118946_1013825  2020  4637.463686
170  118946_1013825  2021  5145.763020
171  118946_1013825  2022  3978.541451
172  118946_1013825  2023  4400.650510


In [50]:
crop_cover_df

Unnamed: 0,tile_id,year,area_ha
168,118946_1013825,2019,6688.191255
169,118946_1013825,2020,4637.463686
170,118946_1013825,2021,5145.763020
171,118946_1013825,2022,3978.541451
172,118946_1013825,2023,4400.650510
...,...,...,...
1,93946_988825,2020,2233.924039
2,93946_988825,2021,2873.754902
3,93946_988825,2022,2264.106627
4,93946_988825,2023,1468.365137
