### === USER CONFIGURATION ===


In [None]:
# === USER CONFIGURATION ===
# Path to downloaded product
PATH_L1B_DATA = "PATH/TO/L1B/DATA"

# Output folder chosen by the user
OUTPUT_FOLDER = "PATH/TO/OUTPUT/FOLDER" 

# Path to geoid file : OPTIONNAL
GEOID_PATH = ""


# === SEN2VM OPTIONS ===

GRID_MODE = "direct"  # direct or inverse
MOVE_GIPP = True   # True = move extracted GIPP dirs directly under S2A/S2B/S2C
                   # False = keep them inside GIP_* folders
IERS_TYPE = "A"
UTM_EPSG = 23037  # UTM zone EPSG code for the ROI
LOCATION = {
    "ul_x": -235763.5,
    "ul_y": 3344817.1,
    "lr_x": -234763.5,
    "lr_y": 3345817.1
}
STEPS = {
    "10m_bands": 10,
    "20m_bands": 20,
    "60m_bands": 60
}


# === GDAL ORTHO OPTIONS ===
ORTHO_SETTINGS = {
    "enabled": True,                 # True = do orthorectification
    "keep_bands": ["B01" , "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B10", "B11", "B12"],
    "resolution_mode": "global",     # "global" or "per_band"
    "global_resolution": 10,         # used if resolution_mode = "global"

}


In [None]:
# === GIPPs ===

import os
import shutil
import tarfile
import re
from datetime import datetime


root_dir = os.getcwd()               
src_dir = root_dir                   

# =====================================================================
# 1) CLONE sen2vm-gipp-database
# =====================================================================

gipp_repo_name = "sen2vm-gipp-database"
gipp_repo_path = os.path.join(src_dir, gipp_repo_name)

# Delete repository if exists
if os.path.exists(gipp_repo_path):
    print(f"Removing existing repository: {gipp_repo_path}")
    shutil.rmtree(gipp_repo_path)

# Clone repository fresh
print("Cloning sen2vm-gipp-database...")
!git clone https://github.com/sen2vm/sen2vm-gipp-database.git {gipp_repo_path}
print("Clone complete.\n")

# =====================================================================
# 2) CLONE sen2vm-core 
# =====================================================================

print("cloning sen2vm-core...")

core_repo_name = "sen2vm-core"
core_repo_path = os.path.join(src_dir, core_repo_name)

# Delete existing repo if present
if os.path.exists(core_repo_path):
    print(f"Removing existing repository: {core_repo_path}")
    shutil.rmtree(core_repo_path)

# Clone core repo
!git clone https://github.com/sen2vm/sen2vm-core.git {core_repo_path}
print("sen2vm-core clone complete.\n")



# =====================================================================
# 3) EXTRACT PRODUCT DATE
# =====================================================================

match = re.search(r"_V(\d{8}T\d{6})_", PATH_L1B_DATA)
if not match:
    raise RuntimeError("Could not extract product date from L1B filename.")

product_date_str = match.group(1)
product_date = datetime.strptime(product_date_str, "%Y%m%dT%H%M%S")

print("Product date extracted:", product_date_str, "\n")

# =====================================================================
# 4) PARSE ALL GIPP FILES AND GROUP THEM
# =====================================================================

def parse_gipp_filename(filename):
    pattern = (
        r"^(S2[A-C])_OPER_GIP_(\w+)_MPC__"
        r"(\d{8}T\d{6})_V(\d{8}T\d{6})_(\d{8}T\d{6})_(B[\dA-Z]+)\.TGZ$"
    )
    m = re.match(pattern, filename)
    if not m:
        return None

    return {
        "mission": m.group(1),
        "gip_type": m.group(2),
        "gen_date": datetime.strptime(m.group(3), "%Y%m%dT%H%M%S"),
        "valid_from": datetime.strptime(m.group(4), "%Y%m%dT%H%M%S"),
        "valid_to": datetime.strptime(m.group(5), "%Y%m%dT%H%M%S"),
        "band": m.group(6),
    }

grouped = {}

for mission in ["S2A", "S2B", "S2C"]:
    mission_dir = os.path.join(gipp_repo_path, mission)
    for gip_dir in os.listdir(mission_dir):
        gip_path = os.path.join(mission_dir, gip_dir)
        if not os.path.isdir(gip_path):
            continue

        for f in os.listdir(gip_path):
            if not f.endswith(".TGZ"):
                continue

            info = parse_gipp_filename(f)
            if not info:
                print("Skipping unrecognized GIPP:", f)
                continue

            key = (info["mission"], info["gip_type"], info["band"])
            grouped.setdefault(key, [])
            grouped[key].append({
                "filename": f,
                "path": os.path.join(gip_path, f),
                "valid_from": info["valid_from"]
            })

print("GIPP files grouped.\n")

# =====================================================================
# 5) SELECT CORRECT GIPP FOR EACH GROUP
# =====================================================================

selected = []

for key, items in grouped.items():
    valid_items = [it for it in items if it["valid_from"] <= product_date]
    if not valid_items:
        continue

    best = max(valid_items, key=lambda x: x["valid_from"])
    selected.append(best)

print(f"Selected {len(selected)} GIPP files.\n")

# =====================================================================
# 6) REMOVE NON-SELECTED TGZ
# =====================================================================

selected_files = set(item["path"] for item in selected)

for mission in ["S2A", "S2B", "S2C"]:
    mission_dir = os.path.join(gipp_repo_path, mission)

    for gip_dir in os.listdir(mission_dir):
        gip_path = os.path.join(mission_dir, gip_dir)
        for f in os.listdir(gip_path):
            fpath = os.path.join(gip_path, f)

            if f.endswith(".TGZ") and fpath not in selected_files:
                print("Removing outdated GIPP:", fpath)
                os.remove(fpath)

print("Old GIPP removed.\n")

# =====================================================================
# 7) UNTAR SELECTED GIPP
# =====================================================================

for item in selected:
    tgz_path = item["path"]
    dst_dir = os.path.dirname(tgz_path)

    print("Extracting:", tgz_path)
    with tarfile.open(tgz_path, "r:gz") as tar:
        tar.extractall(dst_dir)

    os.remove(tgz_path)

print("\nTGZ extraction complete.\n")

# =====================================================================
# 8) OPTIONAL MOVE OF GIPP FILES
# =====================================================================

if MOVE_GIPP:
    print("Moving extracted GIPP into mission folders...\n")

    for mission in ["S2A", "S2B", "S2C"]:
        mission_dir = os.path.join(gipp_repo_path, mission)

        for gip_dir in os.listdir(mission_dir):
            gip_path = os.path.join(mission_dir, gip_dir)

            if not os.path.isdir(gip_path):
                continue

            for f in os.listdir(gip_path):
                fpath = os.path.join(gip_path, f)
                if os.path.isfile(fpath):
                    shutil.move(fpath, mission_dir)

            if not os.listdir(gip_path):
                os.rmdir(gip_path)

    print("MOVE_GIPP complete.\n")

print("All GIPP processing finished successfully.")


In [None]:
# === DOWNLOAD IERS ===

import os
import re
import requests
from datetime import datetime


root_dir = os.path.dirname(os.getcwd()) 
DATA_DIR = os.path.join(root_dir, "DATA")

if not os.path.exists(DATA_DIR):
    raise RuntimeError(f"DATA directory not found: {DATA_DIR}")

print("Bulletin output directory:", DATA_DIR)

# =====================================================================
# 0. REMOVE EXISTING IERS BULLETINS
# =====================================================================

for f in os.listdir(DATA_DIR):
    if f.startswith("bulletina-") and f.endswith(".txt"):
        os.remove(os.path.join(DATA_DIR, f))
        print("Removed old bulletin A:", f)

    if f.startswith("bulletinb-") and f.endswith(".txt"):
        os.remove(os.path.join(DATA_DIR, f))
        print("Removed old bulletin B:", f)

print("Cleanup of old bulletins complete.\n")

# =====================================================================
# 2. EXTRACT PRODUCT DATE
# =====================================================================

match = re.search(r"_V(\d{8}T\d{6})_", PATH_L1B_DATA)
if not match:
    raise RuntimeError("Could not extract product date from L1B filename.")

product_date_str = match.group(1)
year = int(product_date_str[:4])
month = int(product_date_str[4:6])
day = int(product_date_str[6:8])

product_date = datetime(year, month, day)
print("Product date:", product_date.date(), "\n")

# =====================================================================
# Roman conversion 
# =====================================================================

def int_to_roman(n):
    vals = [
        (1000, 'm'), (900, 'cm'), (500, 'd'), (400, 'cd'),
        (100, 'c'), (90, 'xc'), (50, 'l'), (40, 'xl'),
        (10, 'x'), (9, 'ix'), (5, 'v'), (4, 'iv'), (1, 'i')
    ]
    res = ""
    for v, s in vals:
        while n >= v:
            res += s
            n -= v
    return res

# =====================================================================
# Bulletin B
# =====================================================================

if IERS_TYPE.upper() == "B":

    bulletin_number = 143 + (year - 2000)*12 + (month - 1)
    url = f"https://datacenter.iers.org/data/207/bulletinb-{bulletin_number}.txt"
    dest_file = os.path.join(DATA_DIR, f"bulletinb-{bulletin_number}.txt")

    print("Using Bulletin B:", os.path.basename(dest_file))

    response = requests.get(url)
    if response.status_code != 200:
        raise RuntimeError(f"Bulletin B {bulletin_number} not available (HTTP {response.status_code}).")

    with open(dest_file, "wb") as f:
        f.write(response.content)

    print("Downloaded:", dest_file)

# =====================================================================
# Bulletin A
# =====================================================================

else:

    roman_year = int_to_roman(year - 1987)
    doy = product_date.timetuple().tm_yday
    index = (doy - 1) // 7 + 1

    print(f"Bulletin A Roman year: {roman_year}")
    print(f"Initial weekly index: {index}\n")

    found = False

    while index > 0:
        index_str = f"{index:03d}"
        url = f"https://datacenter.iers.org/data/6/bulletina-{roman_year}-{index_str}.txt"
        print("Trying bulletin:", url)

        response = requests.get(url)

        if response.status_code == 200:
            print("Bulletin found:", index_str)
            dest_file = os.path.join(DATA_DIR, f"bulletina-{roman_year}-{index_str}.txt")
            found = True
            break

        index -= 1

    if not found:
        raise RuntimeError("No Bulletin A available for current or previous weeks.")

    with open(dest_file, "wb") as f:
        f.write(response.content)

    print("Downloaded:", dest_file)


In [None]:
# === GENERATE CONFIG.JSON  ===

import os
import json
import re

root_dir = os.path.dirname(os.getcwd())
USERCONF_DIR = os.path.join(root_dir, "UserConf")
os.makedirs(USERCONF_DIR, exist_ok=True)

print("UserConf directory:", USERCONF_DIR)

# =====================================================
# 1. Extract mission from PATH_L1B_DATA
# =====================================================
match = re.match(r".*/(S2[A-C])_.*\.SAFE$", PATH_L1B_DATA)
if not match:
    raise RuntimeError("Cannot extract mission (S2A/S2B/S2C) from PATH_L1B_DATA.")

mission = match.group(1)

# =====================================================
# 2. Docker paths inside /workspace
# =====================================================
docker_l1b  = "/workspace/DATA/" + os.path.basename(PATH_L1B_DATA)
docker_dem  = "/workspace/DATA/DEM"
docker_gipp = f"/workspace/src/sen2vm-gipp-database/{mission}"

# =====================================================
# 3. Resolve geoid path
# =====================================================
if GEOID_PATH.strip() == "" or GEOID_PATH is None:
    docker_geoid = (
        "/workspace/src/sen2vm-core/src/test/resources/DEM_GEOID/"
        "S2__OPER_DEM_GEOIDF_MPC__20200112T130120_S20190507T000000.gtx"
    )
else:
    # Convert host absolute path → docker relative under /workspace
    docker_geoid = "/workspace/" + GEOID_PATH.replace(root_dir + "/", "")

# =====================================================
# 4. Locate IERS bulletin on host
# =====================================================

DATA_DIR = os.path.join(root_dir, "DATA")
iers_host = None

for f in os.listdir(DATA_DIR):
    if f.startswith("bulletin"):
        iers_host = os.path.join(DATA_DIR, f)
        break

if iers_host is None:
    raise RuntimeError("IERS bulletin not found inside DATA directory.")

docker_iers = "/workspace/DATA/" + os.path.basename(iers_host)

# =====================================================
# 5. Build config dictionary
# =====================================================

config = {
    "l1b_product": docker_l1b,
    "gipp_folder": docker_gipp,
    "gipp_check": True,
    "grids_overwriting": True,
    "dem": docker_dem,
    "geoid": docker_geoid,
    "iers": docker_iers,
    "operation": GRID_MODE,
    "deactivate_available_refining": False,
    "steps": {
        "10m_bands": STEPS["10m_bands"],
        "20m_bands": STEPS["20m_bands"],
        "60m_bands": STEPS["60m_bands"]
    },
    "export_alt": True
}

# Add inverse block only if needed
if GRID_MODE == "inverse":
    config["inverse_location_additional_info"] = {
        "ul_x": float(LOCATION["ul_x"]),
        "ul_y": float(LOCATION["ul_y"]),
        "lr_x": float(LOCATION["lr_x"]),
        "lr_y": float(LOCATION["lr_y"]),
        "referential": UTM_EPSG,
        "output_folder": "/workspace/DATA/Output"
    }

# =====================================================
# Save config.json
# =====================================================

config_path = os.path.join(USERCONF_DIR, "config.json")

with open(config_path, "w") as f:
    json.dump(config, f, indent=4)

print("Configuration file generated:")
print(config_path)


In [None]:
# === GENERATE PARAMS.JSON ===

import os
import json
import re

root_dir = os.path.dirname(os.getcwd())
USERCONF_DIR = os.path.join(root_dir, "UserConf")
os.makedirs(USERCONF_DIR, exist_ok=True)

print("UserConf directory:", USERCONF_DIR)

# -----------------------------------------------------
# Locate GRANULE folders
# -----------------------------------------------------
GR_TARGET_DIR = os.path.join(PATH_L1B_DATA, "GRANULE")

if not os.path.exists(GR_TARGET_DIR):
    raise RuntimeError("GRANULE directory not found inside L1B SAFE.")

granule_folders = [
    os.path.join(GR_TARGET_DIR, d)
    for d in os.listdir(GR_TARGET_DIR)
    if os.path.isdir(os.path.join(GR_TARGET_DIR, d))
]

print("Found", len(granule_folders), "granule folders.")

# -----------------------------------------------------
# Extract detectors and bands from JP2
# -----------------------------------------------------
detectors = set()
bands = set()

pattern = r"_D(\d+)_B(\d{1,2}[A]?)\.jp2$"

for granule in granule_folders:
    img_data_dir = os.path.join(granule, "IMG_DATA")

    if not os.path.isdir(img_data_dir):
        continue

    for fname in os.listdir(img_data_dir):
        match = re.search(pattern, fname)
        if match:
            detectors.add(match.group(1))
            bands.add(f"B{match.group(2)}")

detectors = sorted(detectors)
bands = sorted(bands)

print("Detected detectors:", detectors)
print("Detected bands:", bands)

# -----------------------------------------------------
# Write params.json
# -----------------------------------------------------
params = {
    "detectors": detectors,
    "bands": bands
}

params_path = os.path.join(USERCONF_DIR, "params.json")

with open(params_path, "w") as f:
    json.dump(params, f, indent=4)

print("params.json written to:", params_path)


In [None]:
# === RUN SEN2VM (BUILD + RUN + CLEAN) ===

import os
import subprocess

# =====================================================
# Resolve paths
# =====================================================

dockerfile_dir = os.path.join(root_dir, "src", "sen2vm-core")

config_inside = "/workspace/UserConf/config.json"
params_inside = "/workspace/UserConf/params.json"

UID = str(os.getuid())
GID = str(os.getgid())

# =====================================================
# 1. BUILD DOCKER IMAGE
# =====================================================

print(f"Building Docker image 'sen2vm' from: {dockerfile_dir}")

cmd_build = [
    "docker", "build",
    "-t", "sen2vm",
    dockerfile_dir
]

print("Command:", " ".join(cmd_build), "\n")
subprocess.run(cmd_build, check=True)
print("Docker image built successfully.\n")

# =====================================================
# 2. RUN SEN2VM CONTAINER
# =====================================================

cmd_run = [
    "docker", "run",
    "--rm",
    "-v", f"{root_dir}:/workspace",
    "sen2vm",
    "-c", config_inside,
    "-p", params_inside
]

print("Running Docker container...\n")
print("Command:", " ".join(cmd_run), "\n")

subprocess.run(cmd_run, check=True)

print("\nDocker execution complete.\n")

# =====================================================
# 3. REMOVE DOCKER IMAGE
# =====================================================

print("Removing Docker image 'sen2vm'...")

subprocess.run(["docker", "rmi", "-f", "sen2vm"], check=True)

print("Docker image removed.\n")


In [None]:
import os
import subprocess

root_dir = os.path.dirname(os.getcwd())

# Locate the correct L1B MTD XML
xml_path = None
for f in os.listdir(PATH_L1B_DATA):
    if f.endswith(".xml") and "MTD" in f:
        xml_path = os.path.join(PATH_L1B_DATA, f)
        break

if xml_path is None:
    raise RuntimeError("Cannot find L1B MTD XML inside PATH_L1B_DATA.")

print("Using XML:", xml_path)

xml_docker = "/workspace/" + xml_path.replace(root_dir + "/", "")

# =======================================
# GDAL user parameters
# =======================================

epsg_code = UTM_EPSG

ulx = LOCATION["ul_x"]
uly = LOCATION["ul_y"]
lrx = LOCATION["lr_x"]
lry = LOCATION["lr_y"]

# =======================================
# Output folder: /DATA/GDAL_OUTPUT
# =======================================
OUTDIR = os.path.join(root_dir, "DATA", "GDAL_OUTPUT")
os.makedirs(OUTDIR, exist_ok=True)

OUTDIR_DOCKER = "/workspace/DATA/GDAL_OUTPUT"


# =======================================
# 1. Build GDAL docker
# =======================================
dockerfile_dir = os.path.join(root_dir, "src", "gdal-latest")

print("\n=== BUILDING GDAL LATEST CONTAINER ===\n")

cmd_build = [
    "docker", "build",
    "--platform=linux/amd64",
    "-t", "gdal-latest",
    dockerfile_dir
]

print("Command:", " ".join(cmd_build), "\n")
subprocess.run(cmd_build, check=True)

print("GDAL image built successfully.\n")


# ================================
# Generate script: gdal_ortho.sh
# ================================

gdal_script_path = os.path.join(root_dir, "src", "gdal_ortho.sh")

bands_str = " ".join(ORTHO_SETTINGS["keep_bands"])
bands_array = " ".join([f'"{b}"' for b in ORTHO_SETTINGS["keep_bands"]])

with open(gdal_script_path, "w") as f:
    f.write(f"""#!/bin/bash

XML="{xml_docker}"
OUT_ORTHO="/workspace/DATA/GDAL_OUTPUT"
OUT_MOSAIC="/workspace/DATA/GDAL_MOSAIC"

mkdir -p "$OUT_ORTHO"
mkdir -p "$OUT_MOSAIC"

KEEP_BANDS="{bands_str}"
SDS_LIST_FILE="$OUT_ORTHO/sds_list.txt"
rm -f "$SDS_LIST_FILE"

echo "[1/2] Listing subdatasets..."
SDS_ALL=($(gdalinfo "$XML" | grep SUBDATASET_ | grep NAME= | cut -d= -f2))

echo "-> ${{#SDS_ALL[@]}} subdatasets found"
echo ""

for SDS in "${{SDS_ALL[@]}}"; do
    echo "$SDS" >> "$SDS_LIST_FILE"
done

echo ""
echo "=== ORTHORECTIFICATION ==="
echo ""

for SDS in "${{SDS_ALL[@]}}"; do

    NAME=$(echo "$SDS" | sed 's/.*://')
    BAND=$(echo "$NAME" | grep -o "B[0-9A]\\+")

    if [[ ! " $KEEP_BANDS " =~ " $BAND " ]]; then
        continue
    fi

    BASENAME=$(basename "$NAME")
    OUT="${{OUT_ORTHO}}/${{BASENAME}}_ortho.tif"

    FULL_SDS=SENTINEL2_L1B_WITH_GEOLOC:"$XML":$NAME

    echo "----------------------------------------"
    echo "Processing: $NAME  (Band = $BAND)"
    echo "----------------------------------------"

    rm -f "$OUT"

    gdalwarp "$FULL_SDS" "$OUT" \
        -t_srs EPSG:{epsg_code} \
        -tr {ORTHO_SETTINGS["global_resolution"]} {ORTHO_SETTINGS["global_resolution"]} \
        -te {ulx} {lry} {lrx} {uly} \
        -r bilinear \
        -co COMPRESS=LZW \
        -co TILED=YES \
        -overwrite

    echo ""
done


echo ""
echo "=== MOSAIC GENERATION ==="
echo ""

for BAND in {bands_array}; do
    echo "----------------------------------------"
    echo "Creating mosaic for band: $BAND"
    echo "----------------------------------------"

    INPUT_FILES=($(ls ${{OUT_ORTHO}}/*_${{BAND}}_ortho.tif 2>/dev/null))

    if [ ${{#INPUT_FILES[@]}} -eq 0 ]; then
        echo "No ortho images found for band $BAND"
        continue
    fi

    OUTPUT="${{OUT_MOSAIC}}/ORTHO_mosaic_${{BAND}}.tif"

    gdalwarp \
        "${{INPUT_FILES[@]}}" \
        "$OUTPUT" \
        -r bilinear \
        -dstnodata 0 \
        -srcnodata 0 \
        -multi \
        -wm 2048 \
        -overwrite \
        -co COMPRESS=LZW \
        -co TILED=YES \
        -ot UInt16

    echo " Mosaic written → $OUTPUT"
    echo ""
done

echo "=== GDAL processing complete ==="
""")

os.chmod(gdal_script_path, 0o755)

print("Generated:", gdal_script_path)


# =======================================
# 3. Run GDAL processing inside container
# =======================================

print("\n=== RUNNING GDAL PROCESSING ===\n")

cmd_run = [
    "docker", "run",
    "--rm",
    "-v", f"{root_dir}:/workspace",
    "gdal-latest",
    "/workspace/src/gdal_ortho.sh"
]

print("Command:", " ".join(cmd_run), "\n")
subprocess.run(cmd_run, check=True)

print("\nGDAL ortho + mosaic complete.\n")


# =======================================
# 4. Delete GDAL docker image
# =======================================
print("Removing gdal-latest image...\n")

subprocess.run(["docker", "rmi", "-f", "gdal-latest"], check=True)

print("GDAL image removed.\n")
