# Sentinel-2 Downloader Notebook (Self-Contained for ArcGIS Pro)

This notebook contains everything needed to search and download Sentinel-2 SAFE products from the Copernicus Data Space Ecosystem (CDSE) by MGRS tile.

Notes for ArcGIS Pro:
- You can run this notebook directly inside ArcGIS Pro's Notebook environment.
- If a package is missing, run the install cell below. ArcGIS Pro uses a conda-based Python; `%pip` in a notebook will install into the active environment.
- If behind a corporate proxy, you may need to configure environment variables (HTTP_PROXY/HTTPS_PROXY) in your session before installing/connecting.


In [None]:
# Optional: Install dependencies (uncomment and run if needed)
# %pip install --upgrade pip
# %pip install sentinelsat python-dateutil requests tqdm pandas


In [None]:
# Imports and utilities
from __future__ import annotations

import json
from datetime import datetime
from pathlib import Path
from typing import List, Dict

from dateutil.parser import isoparse
from sentinelsat import SentinelAPI
from tqdm import tqdm

# Helper functions

def parse_date(s: str) -> datetime:
    try:
        if len(s) == 10 and s[4] == '-' and s[7] == '-':
            return datetime.strptime(s, "%Y-%m-%d")
        return isoparse(s)
    except Exception as e:
        raise ValueError(f"Invalid date: {s}") from e


def ensure_output_dir(path: str) -> Path:
    p = Path(path)
    p.mkdir(parents=True, exist_ok=True)
    return p


def connect(username: str, password: str) -> SentinelAPI:
    # CDSE API URL (replaces legacy SciHub)
    return SentinelAPI(username, password, "https://apihub.copernicus.eu/apihub")


def query_products(api: SentinelAPI, tiles: List[str], date_start: datetime, date_end: datetime,
                   product_type: str, max_cloud: int) -> Dict:
    products_total: Dict = {}
    for tile in tiles:
        products = api.query(
            date=(date_start, date_end),
            platformname='Sentinel-2',
            producttype=product_type,
            cloudcoverpercentage=(0, max_cloud),
            filename=f"*_{tile}_*",
        )
        products_total.update(products)
    return products_total


def to_geojson(products: Dict) -> Dict:
    features = []
    for _, p in products.items():
        geom = p.get('footprint')
        if isinstance(geom, str):
            try:
                geom = json.loads(geom.replace("'", '"'))
            except Exception:
                geom = None
        feat = {
            "type": "Feature",
            "geometry": geom,
            "properties": {k: p.get(k) for k in (
                'title', 'beginposition', 'endposition', 'ingestiondate', 'size', 'producttype', 'cloudcoverpercentage'
            )}
        }
        features.append(feat)
    return {"type": "FeatureCollection", "features": features}


def run_download(tiles: List[str], username: str, password: str,
                 start: str, end: str,
                 product_type: str = "S2MSI2A",
                 max_cloud: int = 20,
                 output: str = "downloads",
                 unzip: bool = False,
                 footprints: str | None = None,
                 limit: int | None = None,
                 ascending: bool = False) -> Dict:
    """
    Download Sentinel-2 products matching criteria and return a summary dict:
    {"products": dict, "uuids": list[str], "output_dir": Path}
    """
    start_dt = parse_date(start)
    end_dt = parse_date(end)
    out_dir = ensure_output_dir(output)
    api = connect(username, password)

    print(f"Querying products for tiles {tiles} between {start_dt.date()} and {end_dt.date()} ...")
    products = query_products(api, tiles, start_dt, end_dt, product_type, max_cloud)

    if not products:
        print("No products found matching criteria.")
        return {"products": {}, "uuids": [], "output_dir": out_dir}

    import pandas as pd  # local import to avoid import if not used earlier
    df = api.to_dataframe(products)
    df.sort_values('beginposition', ascending=ascending, inplace=True)
    if limit:
        df = df.head(limit)
    uuids = list(df.index)

    print(f"Found {len(uuids)} product(s). Starting downloads to {out_dir} ...")
    for uuid in tqdm(uuids, desc="Downloading"):
        prod = products[uuid]
        title = prod['title']
        zip_path = out_dir / f"{title}.zip"
        if zip_path.exists():
            tqdm.write(f"Skip existing: {zip_path.name}")
            continue
        try:
            api.download(uuid, directory_path=str(out_dir), checksum=True)
        except Exception as e:
            tqdm.write(f"Failed {title}: {e}")

    if unzip:
        import zipfile
        for uuid in uuids:
            prod = products[uuid]
            title = prod['title']
            zip_path = out_dir / f"{title}.zip"
            if not zip_path.exists():
                continue
            safe_dir = out_dir / f"{title}.SAFE"
            if safe_dir.exists():
                continue
            try:
                with zipfile.ZipFile(zip_path, 'r') as zf:
                    zf.extractall(out_dir)
            except Exception as e:
                print(f"Unzip failed for {zip_path.name}: {e}")

    if footprints:
        gj = to_geojson(products)
        fp = Path(footprints)
        fp.parent.mkdir(parents=True, exist_ok=True)
        fp.write_text(json.dumps(gj))
        print(f"Footprints written: {fp}")

    print("Done.")
    return {"products": products, "uuids": uuids, "output_dir": out_dir}


In [None]:
# Configure your variables here
TILES = ["T35LKC", "T35LKD", "T35KKB", "T35KLB", "T35LLC", "T35LLD"]
START_DATE = "2025-08-01"  # inclusive
END_DATE = "2025-09-08"    # inclusive
USERNAME = "your_cdse_username"
PASSWORD = "your_cdse_password"

# Optional parameters
PRODUCT_TYPE = "S2MSI2A"   # or "S2MSI1C"
MAX_CLOUD = 20              # percent
OUTPUT_DIR = "downloads"   # relative to project root
UNZIP = False               # set True to extract .SAFE folders
FOOTPRINTS = None           # e.g., "footprints.geojson"
LIMIT = None                # e.g., 5 to limit number of products
ASCENDING = False           # sort by beginposition ascending


In [None]:
# Run the download
result = run_download(
    tiles=TILES,
    username=USERNAME,
    password=PASSWORD,
    start=START_DATE,
    end=END_DATE,
    product_type=PRODUCT_TYPE,
    max_cloud=MAX_CLOUD,
    output=OUTPUT_DIR,
    unzip=UNZIP,
    footprints=FOOTPRINTS,
    limit=LIMIT,
    ascending=ASCENDING,
)

print("Products found:", len(result["uuids"]))
print("Output dir:", result["output_dir"])