# === USER CONFIGURATION ===


In [None]:
# # === USER CONFIGURATION ===
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\
# /!\/!\/!\ CAREFUL, please read the README_Notebooks.md file before
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\

WORKDIR = "PATH/TO/WORKDIR"  # Working directory for sen2vm processing
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\
# /!\/!\/!\ CAREFUL, the structure of the WORKDIR shall respect the one described in README_Notebooks.md
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\

# Path to downloaded product
PATH_L1B_DATA = "PATH/TO/L1B/PRODUCT"
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\
# /!\/!\/!\ CAREFUL, for now PATH_L1B_DATA shall be inside WORKID/DATA and respect the one described in README_Notebooks.md
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\

# Output folder chosen by the user
INVERSE_OUTPUT_FOLDER = "PATH/TO/OUTPUT/FOLDER"  # Used only if GRID_MODE = "inverse"


# === SEN2VM OPTIONS ===

GRID_MODE = "direct"  # direct or inverse
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\
# /!\/!\/!\ CAREFUL, for now only direct shall be used with a full product (no missing granules)
# /!\/!\/!\/!\/!\/!\/!\/!\/!\/!\/!\

UTM_EPSG = 23037 # UTM zone EPSG code for the ROI
LOCATION =  {
    "ul_x": -185299,
    "ul_y": 3363178,
    "lr_x": -17915,
    "lr_y": 3352896
}

STEPS = {
    "10m_bands": 10,
    "20m_bands": 20,
    "60m_bands": 60
}


# === GDAL ORTHO OPTIONS ===

ORTHO_SETTINGS = {
    "keep_bands": ["B01" , "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B10", "B11", "B12"], # list of bands to keep for orthorectification
}
 


# GIPP database download (Optionnal)

In [None]:
# === GIPPs ===
# This step is optionnal if the Database was already downloaded or if you want to use your own GIPP
# Please note that this current notebook will search for a subfolder with mission S2[A/B/C] inside the GIPP folder

import os
import shutil
import tarfile
import re
from datetime import datetime


gipp_dir = os.path.join(WORKDIR, "DATA")     
os.makedirs(gipp_dir, exist_ok=True)             

# =====================================================================
# 1) CLONE sen2vm-gipp-database
# =====================================================================

gipp_repo_name = "sen2vm-gipp-database"
gipp_repo_path = os.path.join(gipp_dir, gipp_repo_name)

# Delete repository if exists
if os.path.exists(gipp_repo_path):
    print(f"Removing existing repository: {gipp_repo_path}")
    shutil.rmtree(gipp_repo_path)

# Clone repository fresh
print("Cloning sen2vm-gipp-database...")
!git clone https://github.com/sen2vm/sen2vm-gipp-database.git {gipp_repo_path}
print("Clone complete.\n")



print("GIPP processing finished successfully.")


# IERS Download (optionnal)

In [None]:
# === DOWNLOAD IERS ===

import os
import re
import requests
from datetime import datetime

DATA_DIR = os.path.join(WORKDIR, "DATA")

if not os.path.exists(DATA_DIR):
    raise RuntimeError(f"DATA directory not found: {DATA_DIR}")

print("Bulletin output directory:", DATA_DIR)

# =====================================================================
# 0. REMOVE EXISTING IERS BULLETINS
# =====================================================================

for f in os.listdir(DATA_DIR):
    if f.startswith("bulletina-") and f.endswith(".txt"):
        os.remove(os.path.join(DATA_DIR, f))
        print("Removed old bulletin A:", f)

    if f.startswith("bulletinb-") and f.endswith(".txt"):
        os.remove(os.path.join(DATA_DIR, f))
        print("Removed old bulletin B:", f)

print("Cleanup of old bulletins complete.\n")

# =====================================================================
# 2. EXTRACT PRODUCT DATE (FROM DATASTRIP)
# =====================================================================

datastrip_dir = os.path.join(PATH_L1B_DATA, "DATASTRIP")

if not os.path.isdir(datastrip_dir):
    raise RuntimeError(f"DATASTRIP directory not found: {datastrip_dir}")

datastrip_entries = os.listdir(datastrip_dir)
if not datastrip_entries:
    raise RuntimeError(f"No DATASTRIP found in: {datastrip_dir}")

# Take the first DATASTRIP product
datastrip_name = datastrip_entries[0]

match = re.search(r"_S(\d{8})T\d{6}_", datastrip_name)
if not match:
    raise RuntimeError("Could not extract product date from DATASTRIP name.")

product_date_str = match.group(1)

year = int(product_date_str[:4])
month = int(product_date_str[4:6])
day = int(product_date_str[6:8])

product_date = datetime(year, month, day)

print("Product date extracted from DATASTRIP:", product_date.date(), "\n")

# =====================================================================
# Roman conversion 
# =====================================================================

def int_to_roman(n):
    vals = [
        (1000, 'm'), (900, 'cm'), (500, 'd'), (400, 'cd'),
        (100, 'c'), (90, 'xc'), (50, 'l'), (40, 'xl'),
        (10, 'x'), (9, 'ix'), (5, 'v'), (4, 'iv'), (1, 'i')
    ]
    res = ""
    for v, s in vals:
        while n >= v:
            res += s
            n -= v
    return res


# =====================================================================
# Bulletin A
# =====================================================================


roman_year = int_to_roman(year - 1987)
doy = product_date.timetuple().tm_yday
index = (doy - 1) // 7 + 1

print(f"Bulletin A Roman year: {roman_year}")
print(f"Initial weekly index: {index}\n")

found = False

while index > 0:
    index_str = f"{index:03d}"
    url = f"https://datacenter.iers.org/data/6/bulletina-{roman_year}-{index_str}.txt"
    print("Trying bulletin:", url)

    response = requests.get(url)

    if response.status_code == 200:
        print("Bulletin found:", index_str)
        dest_file = os.path.join(DATA_DIR, f"bulletina-{roman_year}-{index_str}.txt")
        found = True
        break

    index -= 1

if not found:
    raise RuntimeError("No Bulletin A available for current or previous weeks.")

with open(dest_file, "wb") as f:
    f.write(response.content)

print("Downloaded:", dest_file)


# Sen2VM configuration

In [None]:
# === GENERATE CONFIG.JSON  ===

import os
import json
import re
import shutil
from numpy import double


USERCONF_DIR = os.path.join(WORKDIR, "UserConf")
os.makedirs(USERCONF_DIR, exist_ok=True)

print("UserConf directory:", USERCONF_DIR)

GEOID_DIR = os.path.join(WORKDIR, "DATA", "GEOID")
os.makedirs(GEOID_DIR, exist_ok=True)

print("Geoid directory:", GEOID_DIR)

# =====================================================
# 1. Extract mission from DATASTRIP
# =====================================================

datastrip_dir = os.path.join(PATH_L1B_DATA, "DATASTRIP")

if not os.path.isdir(datastrip_dir):
    raise RuntimeError(f"DATASTRIP directory not found: {datastrip_dir}")

datastrip_entries = os.listdir(datastrip_dir)
if not datastrip_entries:
    raise RuntimeError(f"No DATASTRIP found in: {datastrip_dir}")

datastrip_name = datastrip_entries[0]

match = re.match(r"(S2[A-C])_OPER_", datastrip_name)
if not match:
    raise RuntimeError("Cannot extract mission (S2A/S2B/S2C) from DATASTRIP name.")

mission = match.group(1)


# =====================================================
# 2. Docker paths inside /workspace
# =====================================================

safe_name = os.path.basename(PATH_L1B_DATA)

docker_l1b  = f"/workspace/DATA/{safe_name}"
docker_dem  = "/workspace/DATA/DEM"
docker_gipp = f"/workspace/DATA/sen2vm-gipp-database/{mission}"

# =====================================================
# 3. Geoid management
# =====================================================

# Notebook location (NOT relative to CWD)
notebook_dir = os.getcwd()

# Relative path to DEM_GEOID from notebook
internal_geoid_dir = os.path.abspath(os.path.join(
    notebook_dir,
    "..", "..", "..", "src", "test", "resources", "DEM_GEOID"
))

# If WORKDIR/DATA/GEOID is empty -> copy internal files
if len(os.listdir(GEOID_DIR)) == 0:
    print("GEOID directory is empty -> copying default geoid files...")
    for f in os.listdir(internal_geoid_dir):
        src = os.path.join(internal_geoid_dir, f)
        dst = os.path.join(GEOID_DIR, f)
        shutil.copy(src, dst)

# Detect .gtx inside GEOID_DIR
geoid_files = [f for f in os.listdir(GEOID_DIR) if f.lower().endswith(".gtx")]
if len(geoid_files) == 0:
    raise RuntimeError("No .gtx geoid file found in WORKDIR/DATA/GEOID.")

docker_geoid = f"/workspace/DATA/GEOID/{geoid_files[0]}"

# =====================================================
# 4. Locate IERS bulletin on host
# =====================================================

DATA_DIR = os.path.join(WORKDIR, "DATA")
iers_host = None

for f in os.listdir(DATA_DIR):
    if f.startswith("bulletin"):
        iers_host = os.path.join(DATA_DIR, f)
        break

if iers_host is None:
    raise RuntimeError("IERS bulletin not found inside WORKDIR/DATA directory.")

docker_iers = "/workspace/DATA/" + os.path.basename(iers_host)

# =====================================================
# 5. Build config dictionary
# =====================================================

config = {
    "l1b_product": docker_l1b,
    "gipp_folder": docker_gipp,
    "auto_gipp_selection": True,
    "grids_overwriting": True,
    "dem": docker_dem,
    "geoid": docker_geoid,
    "iers": docker_iers,
    "operation": GRID_MODE,
    "deactivate_available_refining": False,
    "steps": {
        "10m_bands": STEPS["10m_bands"],
        "20m_bands": STEPS["20m_bands"],
        "60m_bands": STEPS["60m_bands"]
    },
    "export_alt": True
}

# Add inverse block only if needed
if GRID_MODE == "inverse":
    config["inverse_location_additional_info"] = {
        "ul_x": double(LOCATION["ul_x"]),
        "ul_y": double(LOCATION["ul_y"]),
        "lr_x": double(LOCATION["lr_x"]),
        "lr_y": double(LOCATION["lr_y"]),
        "referential": f"EPSG:{UTM_EPSG}",
        "output_folder": "/workspace/DATA/Output"
    }

# =====================================================
# Save config.json
# =====================================================

config_path = os.path.join(USERCONF_DIR, "config.json")

with open(config_path, "w") as f:
    json.dump(config, f, indent=4)

print("Configuration file generated:")
print(config_path)


In [None]:
# === GENERATE PARAMS.JSON ===

import os
import json
import re

USERCONF_DIR = os.path.join(WORKDIR, "UserConf")
os.makedirs(USERCONF_DIR, exist_ok=True)

print("UserConf directory:", USERCONF_DIR)

# =====================================================
# Locate GRANULE folders
# =====================================================
GR_TARGET_DIR = os.path.join(PATH_L1B_DATA, "GRANULE")

if not os.path.exists(GR_TARGET_DIR):
    raise RuntimeError("GRANULE directory not found inside L1B SAFE.")

granule_folders = [
    os.path.join(GR_TARGET_DIR, d)
    for d in os.listdir(GR_TARGET_DIR)
    if os.path.isdir(os.path.join(GR_TARGET_DIR, d))
]

print("Found", len(granule_folders), "granule folders.")

# =====================================================
# Extract detectors and bands from JP2
# =====================================================
detectors = set()
bands = set()

pattern = r"_D(\d+)_B(\d{1,2}[A]?)\.jp2$"

for granule in granule_folders:
    img_data_dir = os.path.join(granule, "IMG_DATA")

    if not os.path.isdir(img_data_dir):
        continue

    for fname in os.listdir(img_data_dir):
        match = re.search(pattern, fname)
        if match:
            detectors.add(match.group(1))
            bands.add(f"B{match.group(2)}")

detectors = sorted(detectors)
bands = sorted(bands)

print("Detected detectors:", detectors)
print("Detected bands:", bands)

# =====================================================
# Write params.json
# =====================================================
params = {
    "detectors": detectors,
    "bands": bands
}

params_path = os.path.join(USERCONF_DIR, "params.json")

with open(params_path, "w") as f:
    json.dump(params, f, indent=4)

print("params.json written to:", params_path)


# Sen2VM run

In [None]:
# === RUN SEN2VM (Docker: BUILD + RUN + CLEAN) ===

import os
import subprocess


dockerfile_dir = os.path.abspath(os.path.join(
    notebook_dir,
    "..", "..", ".." 
))

config_inside = "/workspace/UserConf/config.json"
params_inside = "/workspace/UserConf/params.json"

# =====================================================
# 1. BUILD DOCKER IMAGE
# =====================================================

print(f"Building Docker image 'sen2vm' from: {dockerfile_dir}")

cmd_build = [
    "docker", "build",
    "-t", "sen2vm",
    dockerfile_dir
]

print("Command:", " ".join(cmd_build), "\n")
subprocess.run(cmd_build, check=True)
print("Docker image built successfully.\n")

# =====================================================
# 2. RUN SEN2VM CONTAINER
# =====================================================

cmd_run = [
    "docker", "run",
    "--rm",
    "-v", f"{WORKDIR}:/workspace",  
    "sen2vm",
    "-c", config_inside,
    "-p", params_inside
]

print("Running Docker container...\n")
print("Command:", " ".join(cmd_run), "\n")

subprocess.run(cmd_run, check=True)

print("\nDocker execution complete.\n")

# =====================================================
# 3. REMOVE DOCKER IMAGE
# =====================================================

print("Removing Docker image 'sen2vm'...")

subprocess.run(["docker", "rmi", "-f", "sen2vm"], check=True)

print("Docker image removed.\n")

# Generate Orthorectification images

In [None]:
import os
import subprocess
import glob

# =====================================================
# Locate product name
# =====================================================
product = os.path.basename(os.path.normpath(PATH_L1B_DATA))

# =====================================================
# Locate XML
# =====================================================
xml_list = glob.glob(
    os.path.join(
        PATH_L1B_DATA,
        "DATASTRIP",
        "S2*",
        "S2*_MTD_L1B_DS_*.xml"
    )
)

if len(xml_list) == 0:
    raise RuntimeError("No DATASTRIP MTD XML found")

xml_path = xml_list[0]
xml_name = os.path.basename(xml_path)

# XML path inside docker (relative, after cd)
xml_docker = f"./{xml_name}"

print("Using XML:", xml_path)

# =====================================================
# Locate VRTs
# =====================================================
vrt_list = glob.glob(
    os.path.join(PATH_L1B_DATA, "DATASTRIP", "S2*", "GEO_DATA", "*.vrt")
)

if not vrt_list:
    raise RuntimeError("No VRT files found in GEO_DATA")

vrt_names = [os.path.splitext(os.path.basename(v))[0] for v in vrt_list]

print("Found VRTs:", vrt_names)

# =====================================================
# Output directories
# =====================================================
OUTDIR = os.path.join(WORKDIR, "DATA", "GDAL_OUTPUT_ORTHO")
os.makedirs(OUTDIR, exist_ok=True)

OUTDIR_DOCKER = "/workspace/DATA/GDAL_OUTPUT_ORTHO"

# =====================================================
# Build GDAL docker
# =====================================================
notebook_dir = os.path.dirname(os.getcwd())
dockerfile_dir = os.path.abspath(os.path.join(
    notebook_dir,
    "src",
    "gdal-latest"
))

print("\n=== BUILDING GDAL LATEST CONTAINER ===\n")
cmd_build = [
    "docker", "build",
    "--platform=linux/amd64",
    "-t", "gdal-latest",
    dockerfile_dir
]

print("Command:", " ".join(cmd_build), "\n")
subprocess.run(cmd_build, check=True)
print("GDAL image built successfully.\n")

# =====================================================
# Generate gdal_ortho.sh
# =====================================================
gdal_script_path = os.path.join(WORKDIR, "src", "gdal_ortho.sh")
os.makedirs(os.path.dirname(gdal_script_path), exist_ok=True)

vrt_array = " ".join([f'"{v}"' for v in vrt_names])

with open(gdal_script_path, "w") as f:
    f.write(f"""#!/bin/bash
set +e

cd /workspace/DATA/{product}

OUT_ORTHO="/workspace/DATA/GDAL_OUTPUT_ORTHO"
OUT_MOSAIC="/workspace/DATA/GDAL_OUTPUT_MOSAIC"

mkdir -p "$OUT_ORTHO"
mkdir -p "$OUT_MOSAIC"

XML="{xml_docker}"

echo "=== ORTHORECTIFICATION ==="

for VRT in {vrt_array}; do
    BASENAME="$VRT"
    OUT="$OUT_ORTHO/${{BASENAME}}_ortho.tif"

    echo "----------------------------------------"
    echo "Processing VRT: $VRT"
    echo "----------------------------------------"

    rm -f "$OUT"

    gdalwarp \\
        SENTINEL2_L1B_WITH_GEOLOC:./$XML:$VRT \\
        "$OUT" \\
        -t_srs EPSG:{UTM_EPSG} \\
        -te {LOCATION["ul_x"]} {LOCATION["lr_y"]} {LOCATION["lr_x"]} {LOCATION["ul_y"]} \\
        -r bilinear \\
        -co COMPRESS=LZW \\
        -co TILED=YES \\
        -overwrite

    echo ""
done

echo ""
echo "=== MOSAIC GENERATION ==="
echo ""

for BAND in {" ".join(ORTHO_SETTINGS["keep_bands"])}; do
    echo "----------------------------------------"
    echo "Creating mosaic for band: $BAND"
    echo "----------------------------------------"

    INPUT_FILES=($(ls $OUT_ORTHO/*_${{BAND}}_ortho.tif 2>/dev/null))

    if [ ${{#INPUT_FILES[@]}} -eq 0 ]; then
        echo "No ortho images found for band $BAND"
        continue
    fi

    OUTPUT="$OUT_MOSAIC/ORTHO_mosaic_${{BAND}}.tif"

    gdalwarp \\
        "${{INPUT_FILES[@]}}" \\
        "$OUTPUT" \\
        -r bilinear \\
        -dstnodata 0 \\
        -srcnodata 0 \\
        -multi \\
        -wm 2048 \\
        -overwrite \\
        -co COMPRESS=LZW \\
        -co TILED=YES \\
        -ot UInt16

    echo " Mosaic written -> $OUTPUT"
    echo ""
done

echo "=== GDAL processing complete ==="
""")

os.chmod(gdal_script_path, 0o755)
print("Generated:", gdal_script_path)

# =====================================================
# Run GDAL docker
# =====================================================
print("\n=== RUNNING GDAL PROCESSING ===\n")

cmd_run = [
    "docker", "run",
    "--rm",
    "-v", f"{WORKDIR}:/workspace",
    "gdal-latest",
    "/workspace/src/gdal_ortho.sh"
]

print("Command:", " ".join(cmd_run), "\n")
subprocess.run(cmd_run, check=True)
print("\nGDAL ortho + mosaic complete.\n")

# =====================================================
# Cleanup docker image
# =====================================================
print("Removing gdal-latest image...\n")
subprocess.run(["docker", "rmi", "-f", "gdal-latest"], check=True)
print("GDAL image removed.\n")
