<a href="https://colab.research.google.com/github/mianyumifen-bot/codePublic/blob/main/%E7%AC%AC%E4%B8%89%E5%A4%A9sential%E5%BE%88%E5%A4%9A%E6%B3%A2%E6%AE%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 安装必要包（只需运行一次）
!pip install --quiet earthengine-api geemap rasterio rioxarray geopandas shapely pyproj


In [None]:
# 授权并挂载 Google Drive，然后初始化 Earth Engine
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, time, json, math
import geopandas as gpd
import pandas as pd
import ee
import geemap

# Earth Engine 授权并初始化（会弹出链接）
try:
    ee.Initialize(project="ee-mianyumifen")
    print("Earth Engine 已初始化。")
except Exception as e:
    print("需要 EE 授权，开始交互式授权流程...")
    ee.Authenticate()
    ee.Initialize(project="ee-mianyumifen")
    print("Earth Engine 已授权并初始化。")

# 常用打印确认
print("当前工作目录：", os.getcwd())
print("Drive 挂载路径：/content/drive")


Mounted at /content/drive
Earth Engine 已初始化。
当前工作目录： /content
Drive 挂载路径：/content/drive


In [None]:
# ========== 请修改下面两个路径为你 Drive 中实际的位置 ==========
DRIVE_INPUT_DIR = '/content/drive/MyDrive/allRoi'   # 放 allRoi.shp 的目录
OUT_DRIVE_FOLDER = 'sential2_exports_final'                        # 导出到 Drive 的文件夹名（在 MyDrive 下）
LOG_PATH = os.path.join(DRIVE_INPUT_DIR, 'sential2_export_log.json')  # 导出日志路径

# 其他控制参数
HLS_COLLECTION = 'COPERNICUS/S2_SR_HARMONIZED'  # sential2 数据集

PREFERRED_BANDS = ['B4', 'B3', 'B2', 'B11'] # Red, Green, Blue, and B6 (SWIR1)

EXPORT_SCALE = 10 # meters（sential的近红外是20m的）
MAX_BANDS_WARN = 12000  # 如果堆栈波段超过此数，脚本会改为按月合成导出以避免过大单文件
SLEEP_BETWEEN_START = 1  # 启动导出任务时的间隔秒数（防并发过高）


In [None]:
# 读取 ROI shapefile（确保 .shp/.dbf/.shx/.prj 都在同一目录）
roi_shp_path = os.path.join(DRIVE_INPUT_DIR, 'allRoi.shp')
if not os.path.exists(roi_shp_path):
    raise FileNotFoundError(f"找不到 {roi_shp_path}，请把 allRoi.shp 放到 DRIVE_INPUT_DIR 指定的目录下。")

rois_gdf = gpd.read_file(roi_shp_path)
print("读取 ROI 数量：", len(rois_gdf))
print("字段名示例：", rois_gdf.columns.tolist())

# 选择名称字段（优先 'site' 或 'name'，否则创建基于索引的 site 字段）
if 'site' in rois_gdf.columns:
    name_field = 'site'
elif 'name' in rois_gdf.columns:
    name_field = 'name'
else:
    name_field = 'site'
    rois_gdf[name_field] = [f"roi_{i}" for i in range(len(rois_gdf))]
    print("未找到 name/site 字段，已自动生成 'site' 字段。")

# 构建字典：site_name -> shapely geometry
roi_dict = {str(row[name_field]): row.geometry for _, row in rois_gdf.iterrows()}
print("示例 site keys:", list(roi_dict.keys())[:10])


读取 ROI 数量： 11
字段名示例： ['name', 'Shape_Leng', 'Shape_Le_1', 'Shape_Area', 'geometry']
示例 site keys: ['northinletsaltmarsh', 'norriepoint', 'mayberry', 'hillslough', 'gcesapelo', 'cmarshhighsaltmarsh', 'brackishimpoundment', 'bnzrichfen', 'richmondbrackishmarsh', 'siwetland']


In [None]:
# 直接使用用户提供的时间表（字典）
schedule_dict = {
    2014: ["mayberry"],
    2015: ["gcesapelo", "mayberry", "siwetland"],
    2016: ["mayberry", "gcesapelo", "siwetland"],
    2017: ["siwetland", "mayberry", "gcesapelo"],
    2018: ["siwetland", "mayberry", "gcesapelo", "northinletsaltmarsh"],
    2019: ["siwetland", "mayberry", "gcesapelo"],
    2020: ["gcesapelo", "northinletsaltmarsh", "siwetland", "brackishimpoundment", "mayberry"],
    2021: ["brackishimpoundment", "northinletsaltmarsh", "hillslough", "mayberry", "siwetland", "gcesapelo"],
    2022: ["brackishimpoundment", "gcesapelo", "hillslough", "northinletsaltmarsh", "bnzrichfen"],
    2023: ["brackishimpoundment", "gcesapelo", "hillslough", "northinletsaltmarsh", "vancouversaltmarsh"],
    2024: ["cmarshhighsaltmarsh", "norriepoint"]
}

# 展开为 (site, year) pairs1
pairs = []
for year, sites in schedule_dict.items():
    for s in sites:
        pairs.append((s, str(year)))
print("要处理的 site-year 对数量：", len(pairs))
print(pairs)


要处理的 site-year 对数量： 40
[('mayberry', '2014'), ('gcesapelo', '2015'), ('mayberry', '2015'), ('siwetland', '2015'), ('mayberry', '2016'), ('gcesapelo', '2016'), ('siwetland', '2016'), ('siwetland', '2017'), ('mayberry', '2017'), ('gcesapelo', '2017'), ('siwetland', '2018'), ('mayberry', '2018'), ('gcesapelo', '2018'), ('northinletsaltmarsh', '2018'), ('siwetland', '2019'), ('mayberry', '2019'), ('gcesapelo', '2019'), ('gcesapelo', '2020'), ('northinletsaltmarsh', '2020'), ('siwetland', '2020'), ('brackishimpoundment', '2020'), ('mayberry', '2020'), ('brackishimpoundment', '2021'), ('northinletsaltmarsh', '2021'), ('hillslough', '2021'), ('mayberry', '2021'), ('siwetland', '2021'), ('gcesapelo', '2021'), ('brackishimpoundment', '2022'), ('gcesapelo', '2022'), ('hillslough', '2022'), ('northinletsaltmarsh', '2022'), ('bnzrichfen', '2022'), ('brackishimpoundment', '2023'), ('gcesapelo', '2023'), ('hillslough', '2023'), ('northinletsaltmarsh', '2023'), ('vancouversaltmarsh', '2023'), ('cmars

In [None]:
# ---------- 辅助：把 shapely geometry 转为 ee.Geometry ----------
from shapely.geometry import mapping
def shapely_to_ee(geom):
    return ee.Geometry(mapping(geom))

# ---------- 生成掩膜函数（优先用 Fmask / QA_PIXEL，否则用经验阈值） ----------
def make_mask_function(bandnames_list):
    """
    Return a mask function adapted for Sentinel-2 SR Harmonized (QA60) and generic fallbacks.
    This version keeps bitwise ops on integer bands but casts to float for spectral rules.
    """
    bandset = set(bandnames_list)
    use_qa60 = 'QA60' in bandset or 'qa60' in bandset or 'Qa60' in bandset
    use_fmask = 'Fmask' in bandset or 'fmask' in bandset
    use_scl = 'SCL' in bandset  # scene classification layer may exist

    def mask_fn(img):
        img = ee.Image(img)

        # 1) If QA60 present -> use bits 10 and 11 (do bitwise on integer band)
        if use_qa60:
            qa_key = 'QA60' if 'QA60' in bandset else ('qa60' if 'qa60' in bandset else 'Qa60')
            qa = img.select(qa_key)
            opaque_cloud_bit = 1 << 10
            cirrus_bit = 1 << 11
            no_opaque = qa.bitwiseAnd(opaque_cloud_bit).eq(0)
            no_cirrus = qa.bitwiseAnd(cirrus_bit).eq(0)
            qa_mask = no_opaque.And(no_cirrus)
            return img.updateMask(qa_mask)

        # 2) If Fmask present -> follow earlier Fmask logic (keeps integer ops)
        if use_fmask:
            key = 'Fmask' if 'Fmask' in bandset else 'fmask'
            fmask = img.select(key)
            cloud_bit = 1 << 1
            cloud_shadow_bit = 1 << 3
            snow_bit = 1 << 4
            water_bit = 1 << 5
            is_cloud = fmask.bitwiseAnd(cloud_bit).neq(0)
            is_cloud_shadow = fmask.bitwiseAnd(cloud_shadow_bit).neq(0)
            is_snow = fmask.bitwiseAnd(snow_bit).neq(0)
            is_water = fmask.bitwiseAnd(water_bit).neq(0)
            clear_mask = is_cloud.Not().And(is_cloud_shadow.Not()).And(is_snow.Not()).And(is_water.Not())
            return img.updateMask(clear_mask)

        # 3) If SCL (scene classification) exists -> mask cloud classes
        if use_scl:
            scl = img.select('SCL')
            # Typical SCL cloud classes: 3=cloud shadow, 8=medium cloud, 9=high cloud, 10=thin cirrus, 11=snow
            cloud_mask = scl.neq(3).And(scl.neq(8)).And(scl.neq(9)).And(scl.neq(10)).And(scl.neq(11))
            return img.updateMask(cloud_mask)
        try:
            # select, cast to float and convert to reflectance (scale factor 0.0001)
            MNDWI = img.select(['B3','B11']).toFloat().multiply(0.0001).normalizedDifference(['B3','B11']).rename('MNDWI')
        except Exception:
            MNDWI = ee.Image(0)

        # brightness using whichever of preferred bands exist, cast to float and scale
        bnames = [b for b in PREFERRED_BANDS if b in bandnames_list]
        if len(bnames) >= 3:
            # cast + scale then compute mean brightness (reflectance domain)
            brightness = img.select(bnames[:3]).toFloat().multiply(0.0001).reduce(ee.Reducer.mean()).rename('BRIGHT')
        else:
            brightness = ee.Image(0)

        # Conservative thresholds (float-safe)
        bright_ok = brightness.lt(0.9)   # filter out extremely bright pixels (likely cloud)
        not_water = MNDWI.lt(-0.1)       # keep pixels that are not water (conservative)
        final_mask = bright_ok.And(not_water)
        return img.updateMask(final_mask)

    return mask_fn

# ---------- 处理单 site-year 的函数 ----------
def process_site_year(site_name, year, roi_geom, out_drive_folder=OUT_DRIVE_FOLDER, scale=EXPORT_SCALE):
    """
    Process a single site-year (float-safe):
    - filter Sentinel-2 by date & roi
    - apply mask_fn
    - select bands -> cast to float -> rename bands with date -> stacked.toFloat() -> export
    """
    ee_roi = shapely_to_ee(roi_geom)
    start = f'{int(year)}-01-01'
    end = f'{int(year)}-12-31'
    col = ee.ImageCollection(HLS_COLLECTION).filterDate(start, end).filterBounds(ee_roi)
    col_size = col.size().getInfo()
    print(f"[{site_name} {year}] images found:", col_size)
    if col_size == 0:
        return {'status':'no_images','site':site_name,'year':year}

    # get band names to decide availability
    first = ee.Image(col.first())
    bandnames = first.bandNames().getInfo()
    print("Example bandnames:", bandnames[:30])

    # Force use_bands to RGB+B11 intersection with available band names
    desired = ['B4', 'B3', 'B2', 'B11']  # Red, Green, Blue, B11
    use_bands = [b for b in desired if b in bandnames]
    if len(use_bands) == 0:
        use_bands = bandnames[:4]
    print("Using bands:", use_bands)

    # Prepare mask function with available bandnames
    mask_fn = make_mask_function(bandnames)

    # Preprocess each image: mask -> clip -> select -> cast to float -> rename bands with date
    def prep(img):
        img = ee.Image(img)
        img_masked = mask_fn(img)
        img_clip = img_masked.clip(ee_roi)
        date = ee.Date(img_clip.get('system:time_start')).format('YYYYMMdd')
        # select, cast to float and convert to reflectance (0..1)
        chosen = img_clip.select(use_bands).toFloat().multiply(0.0001)
        newnames = [ee.String(b).cat('_').cat(date) for b in use_bands]
        return chosen.rename(newnames).set({'system:index': ee.String(site_name).cat('_').cat(date)})

    prepared = col.map(prep)
    prepared_count = prepared.size().getInfo()
    print(f"[{site_name} {year}] prepared (masked & clipped) images:", prepared_count)
    if prepared_count == 0:
        return {'status':'no_clear_images','site':site_name,'year':year}

    # Combine into single image: every observation's bands become separate bands
    stacked = prepared.toBands().toFloat()   # ensure final image is float

    # Start export to Drive
    export_name = f"{site_name}_{year}"
    task = ee.batch.Export.image.toDrive(
        image=stacked,
        description=export_name,
        folder=out_drive_folder,
        fileNamePrefix=export_name,
        region=ee_roi,
        scale=scale,
        maxPixels=1e13
    )
    task.start()
    print("Export started:", export_name, " -> Drive folder:", out_drive_folder)
    return {'status':'export_started','site':site_name,'year':year,'task_id': getattr(task, 'id', None)}


# ---------- 按月合成的备选方案（当波段太多时） ----------
def process_site_year_monthly(site_name, year, roi_geom, use_bands, mask_fn, out_drive_folder=OUT_DRIVE_FOLDER, scale=EXPORT_SCALE):
    ee_roi = shapely_to_ee(roi_geom)
    # 获取该年影像集合
    start = f'{int(year)}-01-01'
    end = f'{int(year)}-12-31'
    col = ee.ImageCollection(HLS_COLLECTION).filterDate(start, end).filterBounds(ee_roi)

    # 为每个月生成中位数合成
    months = list(range(1,13))
    monthly_images = []
    for m in months:
        s = ee.Date.fromYMD(int(year), m, 1)
        e = s.advance(1, 'month')
        sub = col.filterDate(s, e)
        # 预处理（掩膜 -> 选波段 -> 裁剪）
        def prep_month(img):
          img = ee.Image(img)
          img_masked = mask_fn(img)
          # select -> float -> scale -> clip
          return img_masked.select(use_bands).toFloat().multiply(0.0001).clip(ee_roi)

        sub_prep = sub.map(prep_month)
        # 如果当月没有影像，跳过
        if sub_prep.size().getInfo() == 0:
            continue
        # 用中位数合成（也可改成 median/mean）
        composite = sub_prep.median()
        # 重命名波段为 B4_YYYYMM
        date_label = ee.String(str(year)).cat(ee.String('_')).cat(ee.String(str(m).zfill(2)))
        newnames = [ee.String(b).cat('_').cat(date_label) for b in use_bands]
        composite = composite.rename(newnames)
        monthly_images.append(composite)

    if len(monthly_images) == 0:
        return {'status':'no_clear_images_monthly','site':site_name,'year':year}

    # 把月合成列表合并为单影像
    stacked_monthly = ee.ImageCollection(monthly_images).toBands().toFloat()

    export_name = f"{site_name}_{year}_monthly"
    task = ee.batch.Export.image.toDrive(
        image=stacked_monthly,
        description=export_name,
        folder=out_drive_folder,
        fileNamePrefix=export_name,
        region=ee_roi,
        scale=scale,
        maxPixels=1e13
    )
    task.start()
    print("Monthly-export started:", export_name)
    return {'status':'export_started_monthly','site':site_name,'year':year,'task_id': getattr(task, 'id', None)}


In [None]:
# 批量运行（使用前面构建的 pairs 列表）
results = []
started = 0
for idx, (site, year) in enumerate(pairs, start=1):
    print(f"\n[{idx}/{len(pairs)}] 开始处理： {site} - {year}")
    if site not in roi_dict:
        print("  WARNING: site 在 shapefile 中未找到：", site)
        results.append({'site':site,'year':year,'status':'site_not_found'})
        continue
    try:
        res = process_site_year(site, year, roi_dict[site], out_drive_folder=OUT_DRIVE_FOLDER, scale=EXPORT_SCALE)
        results.append(res)
        print("  结果：", res)
        started += 1
    except Exception as e:
        print("  ERROR:", str(e))
        results.append({'site':site,'year':year,'status':'error','error_msg': str(e)})
    time.sleep(SLEEP_BETWEEN_START)

# 保存日志到 Drive
with open(LOG_PATH, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)
print("\n全部任务已提交（或尝试提交）。日志已保存到：", LOG_PATH)
print("本次启动的任务数量（近似）：", started)



[1/40] 开始处理： mayberry - 2014
[mayberry 2014] images found: 0
  结果： {'status': 'no_images', 'site': 'mayberry', 'year': '2014'}

[2/40] 开始处理： gcesapelo - 2015
[gcesapelo 2015] images found: 1
Example bandnames: ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'WVP', 'SCL', 'TCI_R', 'TCI_G', 'TCI_B', 'MSK_CLDPRB', 'MSK_SNWPRB', 'QA10', 'QA20', 'QA60', 'MSK_CLASSI_OPAQUE', 'MSK_CLASSI_CIRRUS', 'MSK_CLASSI_SNOW_ICE']
Using bands: ['B4', 'B3', 'B2', 'B11']
[gcesapelo 2015] prepared (masked & clipped) images: 1
Export started: gcesapelo_2015  -> Drive folder: sential2_exports_final
  结果： {'status': 'export_started', 'site': 'gcesapelo', 'year': '2015', 'task_id': '4NWZ6JRQQREYEJPUZYCX4RND'}

[3/40] 开始处理： mayberry - 2015
[mayberry 2015] images found: 9
Example bandnames: ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'WVP', 'SCL', 'TCI_R', 'TCI_G', 'TCI_B', 'MSK_CLDPRB', 'MSK_SNWPRB', 'QA10', 'QA20', 'QA60', 'MSK_CLASSI_OP