In [None]:
# @title 🛰️ 第一步：GEE 任务提交器 (V3.1 修复版)
# @markdown #### 1. 基础设置
project_id = "ee-mianyumifen"  # @param {type:"string"}
shp_path = "/content/drive/MyDrive/doverdesaltmarsh/doverdesaltmarsh.shp"  # @param {type:"string"}
staging_folder = "S2_Staging_New_v1"  # @param {type:"string"}

# @markdown #### 2. 数据需求
bands_input = "B2, B3, B4, B8, B7, MNDWI"  # @param {type:"string"}

# @markdown #### 3. 时间与空间
start_year = 2016  # @param {type:"integer"}
end_year = 2026  # @param {type:"integer"}
buffer_radius = 700  # @param {type:"integer"}
scale = 10  # @param {type:"integer"}

# ================= 核心逻辑 =================
import ee
import geemap
import geopandas as gpd
from shapely.geometry import mapping
from google.colab import drive
import os

drive.mount('/content/drive', force_remount=True)
try:
    ee.Initialize(project=project_id)
except:
    ee.Authenticate()
    ee.Initialize(project=project_id)

# --- 任务管理模块 ---
def check_and_ask_cancel():
    print("\n🔍 正在检查 GEE 任务队列...")
    try:
        tasks = ee.batch.Task.list()
        active_tasks = [t for t in tasks if t.state in ['RUNNING', 'READY']]

        count = len(active_tasks)
        if count == 0:
            print("✅ 当前队列空闲，没有积压任务。")
            return

        print(f"⚠️ 发现 {count} 个正在运行或等待的任务！")
        for t in active_tasks[:3]:
            desc = t.config.get('description', '无描述')
            print(f"   - {desc} ({t.state})")
        if count > 3: print("   ... 等")

        ans = input(f"❓ 是否要【取消】这 {count} 个旧任务？(输入 y 确认取消，直接回车跳过): ")

        if ans.lower() == 'y':
            print("🚫 正在批量取消...")
            for t in active_tasks:
                try:
                    t.cancel()
                except:
                    pass
            print(f"✅ 已发送取消指令。")
        else:
            print("👌 保留旧任务，继续提交新任务。")

    except Exception as e:
        print(f"⚠️ 无法检查任务队列 (网络波动): {e}")
        print("   跳过检查，直接继续...")
    print("-" * 30)

# --- 公式库 ---
FORMULAS = {
    'NDVI':  '(b("B8") - b("B4")) / (b("B8") + b("B4"))',
    'MNDWI': '(b("B3") - b("B11")) / (b("B3") + b("B11"))',
    'NDWI':  '(b("B3") - b("B8")) / (b("B3") + b("B8"))',
    'EVI':   '2.5 * ((b("B8") - b("B4")) / (b("B8") + 6 * b("B4") - 7.5 * b("B2") + 1))'
}

def get_processed_collection(roi, year, needed_bands):
    col = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")\
            .filterBounds(roi)\
            .filterDate(f'{year}-01-01', f'{year}-12-31')

    def process_img(img):
        qa = img.select('QA60')
        mask = qa.bitwiseAnd(1<<10).eq(0).And(qa.bitwiseAnd(1<<11).eq(0))
        scaled = img.select(['B.*']).multiply(0.0001)
        final_img = scaled
        for name, formula in FORMULAS.items():
            if name in needed_bands:
                idx_img = scaled.expression(formula).rename(name)
                final_img = final_img.addBands(idx_img)
        return final_img.select(needed_bands).toFloat().updateMask(mask)

    return col.map(process_img)

def main_submit():
    # 1. 检查取消任务
    check_and_ask_cancel()

    # 2. 开始处理
    target_bands = [b.strip() for b in bands_input.split(',')]
    print(f"\n🎯 本次任务目标: {target_bands}")

    gdf = gpd.read_file(shp_path)
    name_col = 'site' if 'site' in gdf.columns else ('name' if 'name' in gdf.columns else None)

    tasks_count = 0
    print("🚀 正在构建任务队列...")

    for idx, row in gdf.iterrows():
        site_name = str(row[name_col]) if name_col else f"site_{idx}"
        roi = ee.Geometry(mapping(row.geometry)).buffer(buffer_radius)

        for year in range(start_year, end_year + 1):
            col = get_processed_collection(roi, year, target_bands)

            if col.size().getInfo() == 0:
                print(f"  ⚠️ {site_name} - {year} 无影像，跳过")
                continue

            def add_index_tag(img):
                date = ee.Date(img.get('system:time_start')).format('YYYYMMdd')
                tile = ee.String(img.get('MGRS_TILE'))
                return img.set('system:index', date.cat('_').cat(tile))

            stack = col.map(add_index_tag).toBands()

            # --- 报错的就是这几行，现在修复了 ---
            task_name = f"{site_name}_{year}"
            # 确保上面这行是一个完整的字符串，双引号成对出现

            task = ee.batch.Export.image.toDrive(
                image=stack,
                description=task_name,
                folder=staging_folder,
                fileNamePrefix=task_name,
                region=roi,
                scale=scale,
                maxPixels=1e13
            )
            task.start()
            tasks_count += 1
            print(f"  ✅ 提交: {site_name}_{year}")

    print(f"\n🎉 完成！共提交 {tasks_count} 个任务。")
    print(f"结果将存入文件夹: {staging_folder}")
    print("请去 GEE 网页端 Tasks 页面等待任务变成绿色 (COMPLETED)。")

main_submit()

Mounted at /content/drive

🔍 正在检查 GEE 任务队列...
⚠️ 发现 11 个正在运行或等待的任务！
   - doverdesaltmarsh_2026 (READY)
   - doverdesaltmarsh_2025 (READY)
   - doverdesaltmarsh_2024 (READY)
   ... 等
❓ 是否要【取消】这 11 个旧任务？(输入 y 确认取消，直接回车跳过): y
🚫 正在批量取消...
✅ 已发送取消指令。
------------------------------

🎯 本次任务目标: ['B2', 'B3', 'B4', 'B8', 'B7', 'MNDWI']
🚀 正在构建任务队列...
  ✅ 提交: doverdesaltmarsh_2016
  ✅ 提交: doverdesaltmarsh_2017
  ✅ 提交: doverdesaltmarsh_2018
  ✅ 提交: doverdesaltmarsh_2019
  ✅ 提交: doverdesaltmarsh_2020
  ✅ 提交: doverdesaltmarsh_2021
  ✅ 提交: doverdesaltmarsh_2022
  ✅ 提交: doverdesaltmarsh_2023
  ✅ 提交: doverdesaltmarsh_2024
  ✅ 提交: doverdesaltmarsh_2025
  ✅ 提交: doverdesaltmarsh_2026

🎉 完成！共提交 11 个任务。
结果将存入文件夹: S2_Staging_New_v1
请去 GEE 网页端 Tasks 页面等待任务变成绿色 (COMPLETED)。


In [None]:
# @title 📂 第二步：云端拆箱与整理 (V2.1 终极正则修复版)
# @markdown #### 1. 路径设置
# 必须和第一步的名字一模一样！
staging_folder = "S2_Staging_New_v1 (1)"  # @param {type:"string"}
final_output_folder = "S2_Final_Collection_doverdesaltmarsh"  # @param {type:"string"}

delete_source_after_split = False  # @param {type:"boolean"}

import os
import re
import rasterio
import shutil
from google.colab import drive
from tqdm.notebook import tqdm

# 强制挂载
drive.mount('/content/drive', force_remount=True)
BASE_PATH = "/content/drive/MyDrive"
IN_DIR = os.path.join(BASE_PATH, staging_folder)
OUT_DIR = os.path.join(BASE_PATH, final_output_folder)

def split_and_organize():
    if not os.path.exists(IN_DIR):
        print(f"❌ 错误：找不到文件夹 {IN_DIR}")
        return

    files = [f for f in os.listdir(IN_DIR) if f.endswith('.tif')]
    print(f"📦 发现 {len(files)} 个文件，开始智能拆箱...\n")

    for filename in tqdm(files, desc="总进度"):
        src_path = os.path.join(IN_DIR, filename)

        try:
            site_name = filename.rsplit('_', 1)[0]
            year = filename.rsplit('_', 1)[1].replace('.tif', '')
        except:
            print(f"⚠️ 跳过文件名不对的文件: {filename}")
            continue

        try:
            with rasterio.open(src_path) as src:
                descriptions = src.descriptions

                # --- 核心修改：更强大的正则匹配 ---
                if not descriptions or descriptions[0] is None:
                    print(f"❌ {filename} 是空文件，跳过。")
                    continue

                grouped = {}

                for idx, desc in enumerate(descriptions):
                    if not desc: continue

                    # 解释：
                    # (\d{8})       -> 提取开头的8位数字 (日期)
                    # .*            -> 忽略中间任意长度的乱七八糟字符
                    # (T\d{2}[A-Z]{3}) -> 提取瓦片号 (T开头+2数字+3字母)
                    # _([a-zA-Z0-9]+)$ -> 提取最后面的波段名
                    match = re.search(r'(\d{8}).*(T\d{2}[A-Z]{3})_([a-zA-Z0-9]+)$', desc)

                    if match:
                        d_str, t_str, b_name = match.groups()
                        key = f"{d_str}_{t_str}"
                        if key not in grouped: grouped[key] = {}
                        grouped[key][b_name] = idx + 1
                    else:
                        # 如果上面的没匹配到，尝试匹配没有瓦片号的情况 (防止漏网之鱼)
                        match_simple = re.search(r'(\d{8}).*_([a-zA-Z0-9]+)$', desc)
                        if match_simple:
                            d_str, b_name = match_simple.groups()
                            t_str = "NoTile"
                            key = f"{d_str}_{t_str}"
                            if key not in grouped: grouped[key] = {}
                            grouped[key][b_name] = idx + 1

                if not grouped:
                    print(f"⚠️ {filename} 依然无法解析，请检查波段名格式。")
                    continue

                # 导出逻辑
                save_dir = os.path.join(OUT_DIR, site_name, year)
                os.makedirs(save_dir, exist_ok=True)

                for dt_key, bands_map in grouped.items():
                    date_val, tile_val = dt_key.split('_')
                    out_name = f"{site_name}_{date_val}_{tile_val}.tif"
                    out_path = os.path.join(save_dir, out_name)

                    sorted_bands = sorted(bands_map.keys())
                    meta = src.meta.copy()
                    meta.update({'count': len(sorted_bands), 'driver': 'GTiff', 'compress': 'lzw'})

                    with rasterio.open(out_path, 'w', **meta) as dst:
                        for new_idx, b_name in enumerate(sorted_bands, start=1):
                            dst.write(src.read(bands_map[b_name]), new_idx)
                            dst.set_band_description(new_idx, b_name)

        except Exception as e:
            print(f"❌ 处理 {filename} 失败: {e}")
            continue

        if delete_source_after_split:
            os.remove(src_path)

    print(f"\n✅ 全部完成！查看结果: {OUT_DIR}")

split_and_organize()

Mounted at /content/drive
📦 发现 11 个文件，开始智能拆箱...



总进度:   0%|          | 0/11 [00:00<?, ?it/s]


✅ 全部完成！查看结果: /content/drive/MyDrive/S2_Final_Collection_doverdesaltmarsh
