# Parallel Sentinel-2 Downloader (Clean)

This notebook contains a minimal, production-ready version of the working parallel downloader. It uses a subprocess-per-file approach with a 4-minute timeout to avoid DLL issues on Windows and prevent hangs.

Key characteristics:
- Subprocess isolation per tile-year
- Uses geemap to download locally
- 4-minute timeout per file
- Skips files that already exist
- Simple, reliable, and proven in testing

In [1]:
# Imports
import os
import sys
import time
import tempfile
import subprocess
from datetime import datetime

In [None]:
# Fixed, proven subprocess script generator
def create_fixed_subprocess_script(tile_idx, year, download_dir, project_id="n20project-458916"):
    """
    Generates a standalone Python script (as a string) that downloads a single
    Sentinel-2 image for a given tile index and year using geemap, in an isolated subprocess.
    This is the exact working structure validated previously.
    """
    script_content = f'''
import sys
import os
import ee
import geemap
from datetime import datetime

TILE_IDX = {tile_idx}
YEAR = {year}
DOWNLOAD_DIR = r"{download_dir}"
PROJECT_ID = "{project_id}"


def log(msg):
    ts = datetime.now().strftime("%H:%M:%S")
    print(f"[{{ts}}] Tile {{TILE_IDX}} Year {{YEAR}}: {{msg}}")
    sys.stdout.flush()

try:
    log("Starting subprocess")
    ee.Initialize(project=PROJECT_ID)

    # Study area and tile grid (same as validated working version)
    test_area = ee.Geometry.Rectangle([-104.1644, 48.4134, -102.8347, 49.0295])
    projection = ee.Projection('EPSG:32614')
    grid = test_area.coveringGrid(projection, 10000)
    tile_geometry = ee.Feature(grid.toList(10).get(TILE_IDX)).geometry()

    def get_s2_simple(roi, year):
        s2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        csPlus = ee.ImageCollection('GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED')

        def join_cloudscore(s2_col, cs_col):
            join = ee.Join.saveFirst('csplus')
            flt = ee.Filter.equals(leftField='system:index', rightField='system:index')
            return ee.ImageCollection(join.apply(s2_col, cs_col, flt))

        def apply_cloud_mask(image):
            cs_img = ee.Image(image.get('csplus')).select('cs_cdf')
            return image.updateMask(cs_img.gte(0.55))

        s2_f = s2.filterBounds(roi).filterDate(f'{{year}}-04-01', f'{{year}}-10-01')
        cs_f = csPlus.filterBounds(roi).filterDate(f'{{year}}-04-01', f'{{year}}-10-01')

        joined = join_cloudscore(s2_f, cs_f)
        col = joined.map(apply_cloud_mask)

        band_names = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
        band_aliases = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']
        return col.select(band_names, band_aliases).median().clip(roi)

    # Skip if already exists
    output_file = os.path.join(DOWNLOAD_DIR, f"subprocess_{{YEAR}}_{{TILE_IDX}}_1.tif")
    if os.path.exists(output_file):
        log("File already exists")
        sys.exit(0)

    log("Processing S2")
    s2_img = get_s2_simple(tile_geometry, YEAR)

    log("Downloading with geemap")
    geemap.download_ee_image_tiles(
        s2_img,
        ee.FeatureCollection([ee.Feature(tile_geometry)]),
        DOWNLOAD_DIR,
        prefix=f'subprocess_{{YEAR}}_{{TILE_IDX}}_',
        crs="EPSG:4326",
        scale=10
    )

    log("Done")

except Exception as e:
    log(f"Error: {{str(e)}}")
    sys.exit(1)
'''

    return script_content

In [5]:
# Runner: dispatch subprocess per tile-year with 4-minute timeout
def run_parallel_s2_download(tiles, years, download_dir, timeout_minutes=4):
    """
    Runs the fixed subprocess script for each tile-year combination.
    - Skips files that already exist
    - Times out any single run after timeout_minutes
    - Tracks basic success/error counts
    """
    os.makedirs(download_dir, exist_ok=True)
    combos = [(t, y) for t in tiles for y in years]
    results = {"total": len(combos), "success": 0, "timeout": 0, "error": 0, "files": []}
    start = time.time()
    
    for i, (tile, year) in enumerate(combos, 1):
        print(f"\n[{i}/{len(combos)}] Tile {tile}, Year {year}")
        expected = os.path.join(download_dir, f"subprocess_{year}_{tile}_1.tif")
        if os.path.exists(expected):
            sz = os.path.getsize(expected) / (1024*1024)
            print(f"  ‚úÖ Exists: {sz:.1f} MB")
            results["success"] += 1
            results["files"].append(expected)
            continue
    
        try:
            script = create_fixed_subprocess_script(tile, year, download_dir)
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
                f.write(script)
                script_path = f.name
    
            proc = subprocess.run([sys.executable, script_path], capture_output=True, text=True, timeout=timeout_minutes*60)
            os.remove(script_path)
    
            if proc.returncode == 0 and os.path.exists(expected):
                sz = os.path.getsize(expected) / (1024*1024)
                print(f"  ‚úÖ Success: {sz:.1f} MB")
                results["success"] += 1
                results["files"].append(expected)
            else:
                print("  ‚ùå Failed")
                if proc.stderr:
                    print("    stderr:", proc.stderr.splitlines()[-1][:200])
                results["error"] += 1
    
        except subprocess.TimeoutExpired:
            print("  ‚è±Ô∏è Timeout")
            results["timeout"] += 1
        except Exception as e:
            print(f"  üí• Error: {e}")
            results["error"] += 1
    
    dur = time.time() - start
    print(f"\nüìä Results: {results['success']}/{results['total']} success ({results['success']/max(1,results['total'])*100:.1f}%) in {dur/60:.1f} min")
    return results

In [7]:
run_parallel_s2_download(tiles=[0,1,2], years=[2023], download_dir="./s2_downloads")


[1/3] Tile 0, Year 2023
  ‚ùå Failed

[2/3] Tile 1, Year 2023
  ‚ùå Failed

[2/3] Tile 1, Year 2023
  ‚ùå Failed

[3/3] Tile 2, Year 2023
  ‚ùå Failed

[3/3] Tile 2, Year 2023
  ‚ùå Failed

üìä Results: 0/3 success (0.0%) in 0.3 min
  ‚ùå Failed

üìä Results: 0/3 success (0.0%) in 0.3 min


{'total': 3, 'success': 0, 'timeout': 0, 'error': 3, 'files': []}

In [4]:
# Notes
# - Requires Earth Engine auth and project access (n20project-458916 by default)
# - Uses a simple 10-tile grid derived from a fixed study area (as in prior work)
# - Safe to re-run: existing files are skipped
# - Per-file timeout default is 4 minutes; adjust via timeout_minutes
# - Output filenames: subprocess_<YEAR>_<TILE>_1.tif