# Download Crop Mask

1 - General setting up

In [1]:
# import libraries
import ee
import geemap
ee.Authenticate()
ee.Initialize()
cpus_ = 7





2 - Select a province to download

In [2]:
from pathlib import Path

# Automatically find the project root (assumes this notebook is in 2_RGB_download)
project_root = Path.cwd().parent

def get_out_dir(provName):
    if provName == 'AB':
        return str(project_root / '5_Data' / 'Mask_download' / 'Alberta')
    elif provName == 'SK':
        return str(project_root / '5_Data' / 'Mask_download' / 'Saskatchewan')
    elif provName == 'MB':
        return str(project_root / '5_Data' / 'Mask_download' / 'Manitoba')
    else:
        raise ValueError("Invalid province name. Use 'AB', 'SK', or 'MB'.")

# Select province
provName = 'SK'  # AB, SK, MB

if provName == 'AB':
    prov = 'Alb.'
    asset_path = 'projects/ee-aafc-annimation/assets/alberta_highway'  # AB
elif provName == 'SK':
    prov = 'Sask.'
    asset_path = "projects/just-amp-296821/assets/road_sk_utm13"  # SK
elif provName == 'MB':
    prov = 'Man.'
    asset_path = 'projects/just-amp-296821/assets/road_mb_utm14'  # MB
else:
    raise ValueError("Invalid province name. Use 'AB', 'SK', or 'MB'.")

out_dir = get_out_dir(provName)

3 - Get tiles

In [3]:
allProvince = ee.FeatureCollection('projects/ee-aafc-annimation/assets/provincialBoundary')
pruid_list = allProvince.aggregate_array('PRFABBR')
selectProv = allProvince.filter(ee.Filter.stringStartsWith('PRFABBR', prov))
# selectProv

# Load full grid and convert to list
grid = ee.FeatureCollection('projects/ee-download-canada/assets/Grid_prairies').filterBounds(selectProv)
list_roi_all = grid.toList(grid.size())
grid_size = list_roi_all.size().getInfo()
grid_size

253

4 - Setup downloader

In [4]:
from multiprocessing import Pool
from tqdm import tqdm
import time

# Define function to download cropland mask

def build_cropland_mask(geom: ee.Geometry) -> ee.Image:
    """
    Builds binary cropland mask: selected AAFC classes → 1, all others → 0.
    Combines AAFC multi-year crop classification with ESA cropland mask.
    """
    TARGET_CLASSES = ee.List([
        132, 133, 134, 135, 136, 137, 138, 139,
        140, 141, 142, 145, 146, 147, 148, 149,
        150, 151, 152, 153, 154, 155, 156, 157,
        158, 160, 162, 167, 174
    ])

    # ESA cropland (v100, class 40)
    esa = ee.ImageCollection("ESA/WorldCover/v100").first().clip(geom)
    esa_crop = esa.eq(40)

    # Filter AAFC ACI to time window and remap target classes
    aci = (
        ee.ImageCollection("AAFC/ACI")
        .filterDate("2018-01-01", "2024-12-31")
        .map(lambda img: img.select(0)
            .remap(TARGET_CLASSES, ee.List.repeat(1, TARGET_CLASSES.size()), 0)
            .rename("cropSel")
            .toUint8()
            .clip(geom))
    )

    # Collapse multi-year into one mask (any year with target crop = 1)
    aafc_mask = aci.max().rename("aafcMask")

    # Combine AAFC and ESA cropland masks
    mask = (
        ee.Image(aafc_mask)
        .updateMask(esa_crop)
        .rename("mask")
        .unmask(0)
        .clip(geom.buffer(30))
    )

    return mask
def get_crp_mask(download_dir, tile_id, tile):
    try:
        import ee, os
        from pathlib import Path
        #import geemap

        # ee.Initialize()

        output_tif = os.path.join(download_dir, f'crop_mask_{tile_id}_1.tif')

        if os.path.exists(output_tif):
            print(f"Crop mask exists for file {Path(output_tif).stem}, skipping...")
        else:
            crop_mask_raster = build_cropland_mask(tile.geometry())
            geemap.download_ee_image_tiles(
                crop_mask_raster,
                ee.FeatureCollection(tile),
                download_dir,
                prefix=f'crop_mask_{tile_id}_',
                crs="EPSG:4326",
                scale=10
            )
    except Exception as e:
        print(f"Error in get_crp_mask for tile {tile_id}: {e}")

def parallelize_download(func, argument_list, num_processes):
    pool = Pool(processes=num_processes)
    jobs = [pool.apply_async(func=func, args=argument) for argument in argument_list]
    pool.close()

    result_list_tqdm = []
    print("Starting parallel download...")
    for index, job in enumerate(tqdm(jobs)):
        print(f"Job {index} started.")
        try:
            result_list_tqdm.append(job.get(timeout=120)) 
            print(f"Job {index} completed.")
        except Exception as e:
            print(f"Job {index} failed: {e}")
        time.sleep(5)  # Short delay to avoid overloading

    return result_list_tqdm

In [5]:
# Prepare argument list for tile downloading

argument_list_config_all = [(out_dir, str(local_idx), ee.Feature(list_roi_all.get(local_idx)))
    for local_idx in range(grid_size)]


5. Download mask
    * Single Tile Download
    * Sequential Download
    * Parallel Download

In [None]:
# Single tile download
# Uncomment the following lines to enable single tile downloading

# tile_index = 2
# download_dir, tile_id, tile_shp = argument_list_config_all[tile_index]
# get_crp_mask(download_dir, tile_id, tile_shp)

In [6]:
# Sequential download
# Uncomment the following lines to enable sequential downloading

tiles = [10, 11, 12]
for tile in tiles:
    download_dir, tile_id, tile_shp = argument_list_config_all[tile]
    get_crp_mask(download_dir, tile_id, tile_shp)


Downloading 1/1: c:\Users\spn733\Work\CSA_Field_Boundary_Segmentation_V2\CSA_Field_Boundary_Segmentation\5_Data\Mask_download\Saskatchewan\crop_mask_10_1.tif


crop_mask_10_1.tif: |          | 0.00/25.0M (raw) [  0.0%] in 00:00 (eta:     ?)

There is no STAC entry for: None


Downloaded 1 tiles in 75.01012420654297 seconds.
Downloading 1/1: c:\Users\spn733\Work\CSA_Field_Boundary_Segmentation_V2\CSA_Field_Boundary_Segmentation\5_Data\Mask_download\Saskatchewan\crop_mask_11_1.tif
Downloading 1/1: c:\Users\spn733\Work\CSA_Field_Boundary_Segmentation_V2\CSA_Field_Boundary_Segmentation\5_Data\Mask_download\Saskatchewan\crop_mask_11_1.tif


crop_mask_11_1.tif: |          | 0.00/25.0M (raw) [  0.0%] in 00:00 (eta:     ?)

Downloaded 1 tiles in 70.42436838150024 seconds.
Downloading 1/1: c:\Users\spn733\Work\CSA_Field_Boundary_Segmentation_V2\CSA_Field_Boundary_Segmentation\5_Data\Mask_download\Saskatchewan\crop_mask_12_1.tif
Downloading 1/1: c:\Users\spn733\Work\CSA_Field_Boundary_Segmentation_V2\CSA_Field_Boundary_Segmentation\5_Data\Mask_download\Saskatchewan\crop_mask_12_1.tif


crop_mask_12_1.tif: |          | 0.00/25.0M (raw) [  0.0%] in 00:00 (eta:     ?)

Downloaded 1 tiles in 67.91438698768616 seconds.


In [None]:
# Parallel download
# uncomment the following lines to enable parallel downloading

# num_processes = min(cpus_, grid_size)  
# result_list = parallelize_download(
#     func=get_crp_mask,
#     argument_list=argument_list_config_all[0:2],
#     num_processes=num_processes
# )

In [None]:
def export_mask_to_drive(tile_id, tile):
    print(f"Starting export for tile {tile_id}...")

    try:
        mask = build_cropland_mask(tile.geometry())

        task = ee.batch.Export.image.toDrive(
            image=mask,
            description=f"crop_mask_{tile_id}",
            folder="EarthEngineExports",  # You can change this folder name
            fileNamePrefix=f"crop_mask_{tile_id}",
            region=tile.geometry().bounds().getInfo()['coordinates'],
            scale=10,
            crs="EPSG:4326",
            maxPixels=1e13
        )
        task.start()
        print(f"✓ Export task started for tile {tile_id}")
    except Exception as e:
        print(f"❌ Failed to export tile {tile_id}: {e}")

In [None]:
tile_index = 0  # or any tile you want
tile = ee.Feature(list_roi_all.get(tile_index))
export_mask_to_drive(str(tile_index), tile)

Starting export for tile 0...
✓ Export task started for tile 0
✓ Export task started for tile 0
