In [None]:
import os
import json
import time
import random
from pathlib import Path
from multiprocessing import Pool

import ee
import geemap

# ======================
# 1) AUTH / CONFIG
# ======================
SERVICE_ACCOUNT = "soilec-service@soiladf2025.iam.gserviceaccount.com"
KEY_FILE        = r"C:\HA\ADF_soilEC_2025\pythoncode\soiladf2025-9f8e370a9642.json"
ee.Initialize(ee.ServiceAccountCredentials(SERVICE_ACCOUNT, KEY_FILE))
# Output folder
DOWNLOAD_DIR    = r"C:\HA\zTest\CropmMask"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# Grid asset + collection IDs
GRID_ASSET      = "projects/ee-download-canada/assets/Grid_prairies"
ESA_WORLDCOVER  = "ESA/WorldCover/v100"   # keep v100 as in your code (cropland=40)
AAFC_ACI        = "AAFC/ACI"              # multi-year crop inventory

# Parallel settings (be polite to EE; large exports can throttle)
NUM_PROCESSES   = 5
RETRY_MAX       = 3
RETRY_BASE_WAIT = 8   # seconds


In [None]:

def _get_grid_feature(idx):
    fc = ee.FeatureCollection(GRID_ASSET)
    size = fc.size().getInfo()
    if idx < 0 or idx >= size:
        raise IndexError(f"Grid index {idx} is out of range [0..{size-1}].")
    feat = ee.Feature(fc.toList(size).get(idx))
    return feat


def _region_from_geometry(geom: ee.Geometry):



    """Return a GeoJSON-like list suitable for geemap.download_ee_image(region=...)."""
    # region must be a list of linear rings or polygons. Use the geometry's coordinates.
    gj = geom.getInfo()   # returns GeoJSON dict
    gtype = gj["type"]
    if gtype == "Polygon":
        return gj["coordinates"]
    elif gtype == "MultiPolygon":
        # Use the outer bounds if multipolygon is huge (simplify to bbox for safety)
        bbox = ee.Geometry(gj).bounds().getInfo()
        return bbox["coordinates"]
    else:
        # Fallback to bounds
        bbox = ee.Geometry(gj).bounds().getInfo()
        return bbox["coordinates"]



# List of classes you want as "1"


def build_cropland_mask(geom: ee.Geometry) -> ee.Image:
    """
    Builds binary cropland mask: selected AAFC classes → 1, all others → 0.
    """
    TARGET_CLASSES = ee.List([132, 133, 134, 135, 136, 137, 138, 139,
    140, 141, 142, 145, 146, 147, 148, 149,150, 151, 152, 153, 154, 155, 156, 157,
    158, 160, 162, 167, 174])
    # ESA cropland (v100, class 40)
    esa = ee.ImageCollection("ESA/WorldCover/v100").first().clip(geom)
    esa_crop = esa.eq(40)
    # img_test = ee.Image(1).clip(geom)

    # Filter AAFC ACI to time window
    aci = (
        ee.ImageCollection("AAFC/ACI"  )
        .filterDate("2018-01-01", "2024-12-31")
        .map(lambda img: (
            # remap: target classes = 1, default = 0
            img.select(0)
               .remap(TARGET_CLASSES, ee.List.repeat(1, TARGET_CLASSES.size()), 0)
               .rename("cropSel")
               .toUint8()
               .clip(geom)
        ))
    )

    # Collapse multi-year into one mask (any year with target crop = 1)
    aafc_mask = aci.max().rename("aafcMask")

    # Combine with ESA cropland
    mask = (
        ee.Image(aafc_mask)
        .updateMask(esa_crop)
        .rename("mask")
        .unmask(0)
        # .toByte()
        .clip(geom.buffer(30))
    )

    return mask



def _download_one(idx: int):
    """Worker body: build the image for a given grid index and download it."""
    _init_ee()

    name = f"mask_zone_{idx}"
    out_path = os.path.join(DOWNLOAD_DIR, f"{name}.tif")

    if os.path.exists(out_path):
        return f"✔️ {name}: exists, skipped."

    feat = _get_grid_feature(idx)
    geom = feat.geometry()
    region = _region_from_geometry(geom)

    img = _build_mask_image(geom)

    # retries for occasional EE throttling
    for attempt in range(1, RETRY_MAX + 1):
        try:
            geemap.download_ee_image(
                image=img,
                filename=out_path,
                scale=10,          # meters; rely on source native projection
                region=region,
                # crs=None  # let EE decide; setting wrong CRS with 10m in degrees breaks things
            )
            return f"✅ {name}: downloaded."
        except Exception as e:
            if attempt == RETRY_MAX:
                # write per-tile error file
                err_file = os.path.join(DOWNLOAD_DIR, f"error_{name}.txt")
                with open(err_file, "w", encoding="utf-8") as f:
                    f.write(f"Index {idx} failed:\n{repr(e)}\n")
                return f"❌ {name}: failed after {RETRY_MAX} attempts. See {err_file}"
            # jittered backoff
            wait_s = RETRY_BASE_WAIT * attempt + random.uniform(0, 4)
            time.sleep(wait_s)


def parallelize_download(index_list, num_processes=4):
    """Run downloads in parallel; safe for Windows."""
    with Pool(processes=num_processes, maxtasksperchild=10) as pool:
        results = pool.map(_download_one, index_list, chunksize=1)
    return results

In [None]:
if __name__ == "__main__":
    # Example: get total grid size to decide which indices to run
    # ee.Initialize(ee.ServiceAccountCredentials(SERVICE_ACCOUNT, KEY_FILE))
    grid_size = ee.FeatureCollection(GRID_ASSET).size().getInfo()
    print("Grid features:", grid_size)

    # Choose which tiles to process
    # e.g., first 50 tiles: range(0, 50); or a custom subset
    indices = list(range(0, min(5, grid_size)))

    print(f"Starting {len(indices)} tiles with {NUM_PROCESSES} processes …")
    out = parallelize_download(indices, num_processes=NUM_PROCESSES)
    for line in out:
        print(line)
    print("All done.")

In [None]:
# import ee
# import geemap
# from multiprocessing import Pool
# # ---- Authenticate with service account ----
# service_account ='soilec-service@soiladf2025.iam.gserviceaccount.com'
# key_file = r"C:\HA\ADF_soilEC_2025\pythoncode\soiladf2025-9f8e370a9642.json"
# credentials = ee.ServiceAccountCredentials(service_account, key_file)
# ee.Initialize(credentials)
# #https://console.cloud.google.com/home/dashboard?project=kochias2; realfieldca.gmail.com

In [None]:
# def start_export(a):
#     import ee, os 
#     import geemap.geemap as geemap
#     from pathlib import Path

#     ee.Initialize(project='pythonconvertedcode')
#     ee.Authenticate()
#     # Change to your appropriate folder
#     download_dir = r'C:\Users\hvt632\CSA_Field_Boundary_Segmentation-main\CropmMask'
#     try: 
#         name = 'mask_zone' + '_' + str(a) # use toByte()/toFloat() if more appropriate
#         out_path = os.path.join(download_dir, f"{name}" + '.tif')
#         if os.path.exists(out_path):
#             return f"File {out_path} already exists. Skipping export for {name}."
#         else: 
#             gridBlack = ee.FeatureCollection('projects/ee-download-canada/assets/Grid_prairies')
#             gridList = gridBlack.toList(gridBlack.size())
#             shp = ee.Feature(gridList.get(a)).geometry()
#             # Prepare image (rename + fill nodata if needed; adjust dtype to your data)
#             esaMask = ee.ImageCollection("ESA/WorldCover/v100").first().rename('esaMask').clip(shp).eq(40)
#             crop = ee.ImageCollection('AAFC/ACI').filter(ee.Filter.date('2018-01-01', '2024-12-31'))
#             oldVal = ee.List([10, 20, 30, 34, 35, 50, 80, 110, 120, 122, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 162, 167, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 185, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 210, 220, 230])
#             newVal = ee.List([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 162, 167, 174, 1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1, 1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1])
#             aafcMask = crop.map(lambda img: img.remap(oldVal, newVal, 0, 'landcover')
#                                 .rename('landcover')
#                                 .toUint32()
#                                 .resample('bilinear')
#                                 .reproject(crs='EPSG:4326', scale=10)
#                                 .gt(1).clip(shp)).sum().focalMean(5).rename('aafcMask').multiply(esaMask)
#             img = ee.Image(aafcMask).clip(shp.buffer(30)).rename('mask').unmask(0).toByte() 
#             try: 
#                 geemap.download_ee_image(
#                         img,
#                         filename=out_path,
#                         scale=10,
#                         crs="EPSG:4326",
#                     )
#                 print(f"Finished exporting {name} to {out_path}")
#             except Exception as e:
#                 import traceback
#                 error_msg = f"Error in worker {a}: {e}\n{traceback.format_exc()}"
#                 with open(os.path.join(download_dir, f"error_{name}.txt"), 'w') as f:
#                     f.write(error_msg)  
#                 return 2
#     except Exception as e:
#         import traceback
#         error_msg = f"Error in worker {a}: {e}\n{traceback.format_exc()}"
#         with open(os.path.join(download_dir, f"error_{name}.txt"), 'w') as f:
#             f.write(error_msg)  
#         return f"Error in worker {a}: {e}\n{traceback.format_exc()}"
    
# def parallelize_download(func, argument_list, num_processes):
 
#     pool = Pool(processes=num_processes)
#     try: 
#         jobs = [pool.apply_async(func=func, args=(*argument,)) 
#                 if isinstance(argument, tuple) 
#                 else pool.apply_async(func=func, args=(argument,)) 
#                 for argument in argument_list]
#         print(f"Number of jobs: {len(jobs)}")
#         pool.close()
#         print("Waiting for all subprocesses done...")
#         result_list_tqdm = []
#         for job in jobs:
#             try: 
#                 #result_list_tqdm.append(job.get())
#                 res = job.get()
#                 if isinstance(res, str):
#                     result_list_tqdm.append(res)
                    
#             except Exception as e:
#                 res = None 
#                 result_list_tqdm.append(f"Error in job: {e}, {res}")
#         return result_list_tqdm
#     except KeyboardInterrupt: 
#         print("Terminating all processes.....!")
#         pool.terminate()
#         pool.join()

In [None]:
# if __name__ == "__main__":
#     # ee.Authenticate()
#     # ee.Initialize(project='pythonconvertedcode')
#     gridBlack = ee.FeatureCollection('projects/ee-download-canada/assets/Grid_prairies') 
#     try:
#         from IPython import get_ipython
#         shell = get_ipython()
#         if shell is None:
#             print("Running in standard Python interpreter")
#         else:
#             shell_name = shell.__class__.__name__
#             if shell_name == 'ZMQInteractiveShell':
#                 print("Running in Jupyter Notebook or JupyterLab. This is what we want.")
#             elif shell_name == 'TerminalInteractiveShell':
#                 print("Running in IPython terminal")
#             else:
#                 print("Running in unknown IPython shell:", shell_name)
#     except ImportError:
#         print("Running in standard Python interpreter (IPython not installed)")
#     argument_list = list(range(gridBlack.size().getInfo()))  # Example argument list
#     num_processes = 4  # Number of parallel processes
#     results = parallelize_download(start_export, argument_list, num_processes)
#     for res in results:
#         print(res)