In [1]:
import pandas as pd

In [2]:
emdat_path = "/home/nissim/Documents/dev/arg-inundaciones/data/public_emdat_custom_request_2025-06-26_fe5041a7-dc26-4391-aef0-f49e5e5d8657.xlsx"
emdat = pd.read_excel(emdat_path)

In [3]:
# Check the data type of the column
print("Column dtype:", emdat["Admin Units"].dtype)
print("Column type:", type(emdat["Admin Units"]))

# Check the type of individual elements
print("\nFirst element type:", type(emdat["Admin Units"].iloc[0]))
print("Second element type:", type(emdat["Admin Units"].iloc[1]))

# Look at the actual content of a few rows
print("\nFirst row content:")
print(emdat["Admin Units"].iloc[0])
print("\nSecond row content:")
print(emdat["Admin Units"].iloc[1])

# Check if it's a string that needs parsing
if isinstance(emdat["Admin Units"].iloc[0], str):
    print("\nIt's a string - might need JSON parsing")
    import json

    try:
        parsed = json.loads(emdat["Admin Units"].iloc[0])
        print("Successfully parsed as JSON:", type(parsed))
    except:
        print("Not valid JSON")
elif isinstance(emdat["Admin Units"].iloc[0], list):
    print("\nIt's already a list")
    print("List element types:", [type(item) for item in emdat["Admin Units"].iloc[0]])

Column dtype: object
Column type: <class 'pandas.core.series.Series'>

First element type: <class 'str'>
Second element type: <class 'str'>

First row content:
[{"adm1_code":431,"adm1_name":"Catamarca"},{"adm1_code":434,"adm1_name":"Cordoba"},{"adm1_code":438,"adm1_name":"Jujuy"},{"adm1_code":440,"adm1_name":"La Rioja"},{"adm1_code":445,"adm1_name":"Salta"},{"adm1_code":450,"adm1_name":"Santiago Del Estero"},{"adm1_code":452,"adm1_name":"Tucuman"}]

Second row content:
[{"adm1_code":430,"adm1_name":"Buenos Aires D.f."},{"adm1_code":434,"adm1_name":"Cordoba"},{"adm1_code":439,"adm1_name":"La Pampa"},{"adm2_code":4386,"adm2_name":"Avellaneda"},{"adm2_code":4395,"adm2_name":"Berisso"},{"adm2_code":4445,"adm2_name":"Lanus"},{"adm2_code":4477,"adm2_name":"Quilmes"},{"adm2_code":82738,"adm2_name":"San Miguel"},{"adm2_code":190525,"adm2_name":"San  Fernando"},{"adm2_code":4631,"adm2_name":"Parana"},{"adm2_code":4836,"adm2_name":"Rosario"}]

It's a string - might need JSON parsing
Successfully

In [4]:
import json
import pandas as pd


def extract_admin_names(admin_units_str):
    """
    Extract adm1 and adm2 names from a JSON string of admin units.
    Returns (adm1_names, adm2_names) as lists.
    """
    try:
        parsed = json.loads(admin_units_str)
        adm1_names = []
        adm2_names = []

        for unit in parsed:
            if "adm1_name" in unit:
                adm1_names.append(unit["adm1_name"])
            if "adm2_name" in unit:
                adm2_names.append(unit["adm2_name"])

        return adm1_names, adm2_names
    except:
        # Return empty lists if parsing fails
        return [], []


# Apply the extraction function to create new columns
emdat[["adm1_names", "adm2_names"]] = emdat["Admin Units"].apply(
    lambda x: pd.Series(extract_admin_names(x))
)

# Display the results
print("=== EXTRACTION RESULTS ===")
print(f"Total rows processed: {len(emdat)}")
print(f"Rows with adm1 names: {sum(emdat['adm1_names'].apply(len) > 0)}")
print(f"Rows with adm2 names: {sum(emdat['adm2_names'].apply(len) > 0)}")

# Show a few examples
print("\n=== SAMPLE RESULTS ===")
for i in range(min(5, len(emdat))):
    print(f"Row {i}:")
    print(f"  adm1_names: {emdat['adm1_names'].iloc[i]}")
    print(f"  adm2_names: {emdat['adm2_names'].iloc[i]}")
    print()

=== EXTRACTION RESULTS ===
Total rows processed: 43
Rows with adm1 names: 27
Rows with adm2 names: 17

=== SAMPLE RESULTS ===
Row 0:
  adm1_names: ['Catamarca', 'Cordoba', 'Jujuy', 'La Rioja', 'Salta', 'Santiago Del Estero', 'Tucuman']
  adm2_names: []

Row 1:
  adm1_names: ['Buenos Aires D.f.', 'Cordoba', 'La Pampa']
  adm2_names: ['Avellaneda', 'Berisso', 'Lanus', 'Quilmes', 'San Miguel', 'San  Fernando', 'Parana', 'Rosario']

Row 2:
  adm1_names: ['Buenos Aires', 'Cordoba', 'La Pampa', 'Santa Fe']
  adm2_names: []

Row 3:
  adm1_names: []
  adm2_names: ['Iriondo']

Row 4:
  adm1_names: ['Buenos Aires']
  adm2_names: []



In [5]:
from shapely.geometry import box
from utils.pygeoboundaries.main import get_area_of_interest_by_names


def get_flood_bounding_box(emdat_row, country_iso3):
    """
    Get bounding box for a flood event based on admin unit names.

    Args:
        emdat_row: Single row from emdat dataframe with adm1_names and adm2_names columns
        country_iso3: ISO3 country code for filtering

    Returns:
        bounding_box: shapely geometry representing the bounding box
        bbox_dict: dictionary with bounding box coordinates
    """

    # Check if we have adm2 names (prefer adm2 over adm1 for more precision)
    if len(emdat_row["adm2_names"]) > 0:
        adm2_names = emdat_row["adm2_names"]
        print(f"Filtered by {len(adm2_names)} adm2 names: {adm2_names}")

        # Get bounding box using geoBoundaries
        bbox_dict = get_area_of_interest_by_names(
            unit_names=adm2_names, adm_level="ADM2", country_iso3=country_iso3
        )

    elif len(emdat_row["adm1_names"]) > 0:
        adm1_names = emdat_row["adm1_names"]
        print(f"Filtered by {len(adm1_names)} adm1 names: {adm1_names}")

        # Get bounding box using geoBoundaries
        bbox_dict = get_area_of_interest_by_names(
            unit_names=adm1_names, adm_level="ADM1", country_iso3=country_iso3
        )

    else:
        print("No admin names found!")
        return None, None

    if bbox_dict is None:
        print("No matching administrative units found!")
        return None, None

    # Convert dictionary to shapely geometry
    bbox_geometry = box(
        bbox_dict["min_lon"],
        bbox_dict["min_lat"],
        bbox_dict["max_lon"],
        bbox_dict["max_lat"],
    )

    # print(f"Bounding box coordinates: {bbox_dict}")
    # print(f"Bounding box area: {bbox_geometry.area:.6f}")

    return bbox_geometry, bbox_dict


# Example usage for a single row
# Assuming you have the country ISO3 code (e.g., 'ARG' for Argentina)
country_iso3 = "ARG"  # You'll need to get this from your emdat data

# Test with row 1 (which has both adm1 and adm2 names)
test_row = emdat.iloc[1]
print("=== TESTING ROW 1 ===")
print(f"adm1_names: {test_row['adm1_names']}")
print(f"adm2_names: {test_row['adm2_names']}")

bbox, bbox_dict = get_flood_bounding_box(test_row, country_iso3)

if bbox is not None:
    print(f"\nBounding box geometry: {bbox}")

    # You can also get the coordinates for satellite imagery search
    bounds = bbox.bounds
    print(f"Min/Max coordinates: {bounds}")

=== TESTING ROW 1 ===
adm1_names: ['Buenos Aires D.f.', 'Cordoba', 'La Pampa']
adm2_names: ['Avellaneda', 'Berisso', 'Lanus', 'Quilmes', 'San Miguel', 'San  Fernando', 'Parana', 'Rosario']
Filtered by 8 adm2 names: ['Avellaneda', 'Berisso', 'Lanus', 'Quilmes', 'San Miguel', 'San  Fernando', 'Parana', 'Rosario']

Bounding box geometry: POLYGON ((-56.9436 -39.92531, -56.9436 -27.468575, -67.8722 -27.468575, -67.8722 -39.92531, -56.9436 -39.92531))
Min/Max coordinates: (-67.8722, -39.92531, -56.9436, -27.468575)


In [6]:
import pandas as pd
from datetime import datetime, timedelta


# Create date column from Year, Month, Day
def create_date(row, year_col, month_col, day_col):
    """
    Create date in YYYY-MM-DD format from Year, Month, Day columns.
    Handles missing values by returning None.
    """
    try:
        year = row[year_col]
        month = row[month_col]
        day = row[day_col]

        # Check if all values are present and valid
        if pd.notna(year) and pd.notna(month) and pd.notna(day):
            # Convert to integers and create date
            year = int(year)
            month = int(month)
            day = int(day)

            # Validate date
            date_obj = datetime(year, month, day)
            return date_obj.strftime("%Y-%m-%d")
        else:
            return None
    except (ValueError, TypeError):
        return None


# Apply the function to create start_date and end_date columns
emdat["start_date"] = emdat.apply(
    lambda row: create_date(row, "Start Year", "Start Month", "Start Day"), axis=1
)
emdat["end_date"] = emdat.apply(
    lambda row: create_date(row, "End Year", "End Month", "End Day"), axis=1
)


# Create datetime range for satellite imagery search
def create_datetime_range(end_date_str):
    """
    Create datetime range starting from end_date and ending 2 days after.
    Format: "YYYY-MM-DD/YYYY-MM-DD"
    """
    if end_date_str is None:
        return None

    try:
        end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
        start_date = end_date
        end_date_plus_2 = end_date + timedelta(days=2)

        return (
            f"{start_date.strftime('%Y-%m-%d')}/{end_date_plus_2.strftime('%Y-%m-%d')}"
        )
    except ValueError:
        return None


# Create datetime_range column
emdat["datetime_range"] = emdat["end_date"].apply(create_datetime_range)

# Display results
print("=== DATE PROCESSING RESULTS ===")
print(f"Total rows: {len(emdat)}")
print(f"Rows with valid start_date: {emdat['start_date'].notna().sum()}")
print(f"Rows with valid end_date: {emdat['end_date'].notna().sum()}")
print(f"Rows with valid datetime_range: {emdat['datetime_range'].notna().sum()}")

# Show sample results
print("\n=== SAMPLE RESULTS ===")
sample_cols = [
    "Start Year",
    "Start Month",
    "Start Day",
    "End Year",
    "End Month",
    "End Day",
    "start_date",
    "end_date",
    "datetime_range",
]
print(emdat[sample_cols].head(10))

# Example usage for a specific row
test_row = emdat.iloc[1]
if test_row["datetime_range"] is not None:
    print(f"\nExample datetime range for row 1: {test_row['datetime_range']}")
    print(
        f"This represents: {test_row['datetime_range'].split('/')[0]} to {test_row['datetime_range'].split('/')[1]}"
    )

=== DATE PROCESSING RESULTS ===
Total rows: 43
Rows with valid start_date: 40
Rows with valid end_date: 41
Rows with valid datetime_range: 41

=== SAMPLE RESULTS ===
   Start Year  Start Month  Start Day  End Year  End Month  End Day  \
0        2000            3        9.0      2000          3     17.0   
1        2000            5       15.0      2000          5     15.0   
2        2000           11        9.0      2000         11     16.0   
3        2000           11       22.0      2000         11     22.0   
4        2001            3       21.0      2001          3     21.0   
5        2001            7        NaN      2001          7      NaN   
6        2001           10        1.0      2001         12      1.0   
7        2002           10       12.0      2002         11      2.0   
8        2003            2       10.0      2003          2     10.0   
9        2003            4       28.0      2003          5     10.0   

   start_date    end_date         datetime_range  
0

## STAC query
Filter for Sentinel 1 and/or 2 images that are 1) within two days of the flood event, 2) below 20% cloud cover, 3) within the bounding box (duh), 4) ???.

In [7]:
# Import required libraries
import pandas as pd
import planetary_computer
import pystac_client
import panel as pn

# Enable Panel for interactive visualizations
pn.extension()

In [8]:
# Step 1: Connect to Planetary Computer Catalog
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,  # Automatically signs requests
)

In [9]:
import pandas as pd
from tqdm import tqdm

# Filter out observations before 2014
emdat_filtered = emdat[emdat["end_date"].notna()].copy()
emdat_filtered["end_date_obj"] = pd.to_datetime(emdat_filtered["end_date"])
emdat_filtered = emdat_filtered[emdat_filtered["end_date_obj"] >= "2014-01-01"]

print("=== FILTERING RESULTS ===")
print(f"Original rows: {len(emdat)}")
print(f"Rows with valid end_date: {len(emdat[emdat['end_date'].notna()])}")
print(f"Rows after 2014 filter: {len(emdat_filtered)}")

# Initialize results storage
results = []

print("\n=== SEARCHING SENTINEL-1 RTC FOR ALL EMDAT ROWS ===")

# Process each row
for idx, row in tqdm(
    emdat_filtered.iterrows(), total=len(emdat_filtered), desc="Processing rows"
):
    try:
        # Get bounding box for this row
        bbox_geometry, bbox_dict = get_flood_bounding_box(row, "ARG")

        if bbox_geometry is None:
            results.append(
                {
                    "row_index": idx,
                    "event_name": row.get("Event Name", "Unknown"),
                    "end_date": row.get("end_date", None),
                    "datetime_range": row.get("datetime_range", None),
                    "bbox_available": False,
                    "items_found": 0,
                    "error": "No bounding box available",
                }
            )
            continue

        # Convert shapely geometry bounds to bbox format
        bounds = bbox_geometry.bounds
        bbox = [bounds[0], bounds[1], bounds[2], bounds[3]]

        # Get datetime range
        datetime_range = row.get("datetime_range")

        if datetime_range is None:
            results.append(
                {
                    "row_index": idx,
                    "event_name": row.get("Event Name", "Unknown"),
                    "end_date": row.get("end_date", None),
                    "datetime_range": None,
                    "bbox_available": True,
                    "items_found": 0,
                    "error": "No valid date range",
                }
            )
            continue

        # Search for Sentinel-1 RTC data
        search = catalog.search(
            collections=["sentinel-1-rtc"],
            bbox=bbox,
            datetime=datetime_range,
            query={
                "sar:instrument_mode": {"eq": "IW"},
                "sar:frequency_band": {"eq": "C"},
                "sar:polarizations": {"in": [["VV"], ["VH"], ["VV", "VH"]]},
            },
        )

        items = search.item_collection()

        # Store results
        results.append(
            {
                "row_index": idx,
                "event_name": row.get("Event Name", "Unknown"),
                "end_date": row.get("end_date", None),
                "datetime_range": datetime_range,
                "bbox_available": True,
                "items_found": len(items),
                "bbox_coords": bbox,
                "error": None,
            }
        )

    except Exception as e:
        results.append(
            {
                "row_index": idx,
                "event_name": row.get("Event Name", "Unknown"),
                "end_date": row.get("end_date", None),
                "datetime_range": row.get("datetime_range", None),
                "bbox_available": False,
                "items_found": 0,
                "error": str(e),
            }
        )

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Summary statistics
print(f"\nTotal rows: {len(results_df)}")
print(f"Rows with imagery: {(results_df['items_found'] > 0).sum()}")
print(f"Total items found: {results_df['items_found'].sum()}")

# Show rows with imagery
rows_with_imagery = results_df[results_df["items_found"] > 0]
if len(rows_with_imagery) > 0:
    print("\nRows with imagery:")
    for _, row in rows_with_imagery.iterrows():
        print(
            f"  Row {row['row_index']}: {row['items_found']} items ({row['datetime_range']})"
        )

=== FILTERING RESULTS ===
Original rows: 43
Rows with valid end_date: 41
Rows after 2014 filter: 23

=== SEARCHING SENTINEL-1 RTC FOR ALL EMDAT ROWS ===


Processing rows:   0%|          | 0/23 [00:00<?, ?it/s]

Filtered by 7 adm1 names: ['Catamarca', 'Cordoba', 'Entre Rios', 'Neuquen', 'Rio Negro', 'Santa Fe', 'Santiago Del Estero']


Processing rows:   4%|▍         | 1/23 [00:00<00:17,  1.23it/s]

Filtered by 4 adm1 names: ['Chaco', 'Corrientes', 'Formosa', 'Misiones']


Processing rows:   9%|▊         | 2/23 [00:01<00:11,  1.90it/s]

Filtered by 2 adm1 names: ['Buenos Aires', 'Buenos Aires D.f.']


Processing rows:  13%|█▎        | 3/23 [00:02<00:19,  1.04it/s]

Filtered by 9 adm1 names: ['Catamarca', 'Chaco', 'Cordoba', 'Corrientes', 'Salta', 'San Luis', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']


Processing rows:  17%|█▋        | 4/23 [00:03<00:14,  1.29it/s]

Filtered by 20 adm2 names: ['Arrecifes', 'Berisso', 'Campana', 'Chacabuco', 'Chivilcoy', 'General Pueyrredon', 'Junin', 'Lujan', 'Mercedes', 'Pergamino', 'Pila', 'Quilmes', 'Salto', 'San Andres de Giles', 'San Antonio de Areco', 'La Plata', 'Tres de Febrero', 'Zarate', 'Pilar', 'General  Lopez']


Processing rows:  22%|██▏       | 5/23 [00:04<00:14,  1.21it/s]

Filtered by 10 adm2 names: ['Curuzu Cuatia', 'Esquina', 'Goya', 'Lavalle', 'Paso de los Libres', 'San Cosme', 'Gualeguaychu', 'Parana', 'Colon', 'Concordia']


Processing rows:  26%|██▌       | 6/23 [00:04<00:14,  1.16it/s]

Filtered by 8 adm1 names: ['Buenos Aires', 'Chaco', 'Cordoba', 'Corrientes', 'Entre Rios', 'Formosa', 'Santa Fe', 'Santiago Del Estero']


Processing rows:  30%|███       | 7/23 [00:05<00:12,  1.28it/s]

Filtered by 19 adm2 names: ['Florentino Ameghino', 'Baradero', 'Arrecifes', 'Colon', 'General Villegas', 'Pergamino', 'Ramallo', 'Rojas', 'Salto', 'San Antonio de Areco', 'San Nicolas', 'Colon', 'General Roca', 'Punilla', 'San Javier', 'La Paz', 'Parana', 'General  Lopez', 'Rosario']


Processing rows:  35%|███▍      | 8/23 [00:06<00:12,  1.24it/s]

Filtered by 15 adm1 names: ['Buenos Aires', 'Catamarca', 'Chubut', 'Cordoba', 'Formosa', 'Jujuy', 'La Pampa', 'Mendoza', 'Misiones', 'Salta', 'San Juan', 'Santa Cruz', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']


Processing rows:  39%|███▉      | 9/23 [00:06<00:10,  1.38it/s]

Filtered by 2 adm1 names: ['Corrientes', 'Entre Rios']


Processing rows:  43%|████▎     | 10/23 [00:07<00:07,  1.65it/s]

Filtered by 1 adm2 names: ['Escalante']


Processing rows:  48%|████▊     | 11/23 [00:08<00:07,  1.54it/s]

Filtered by 2 adm1 names: ['Chaco', 'Salta']


Processing rows:  52%|█████▏    | 12/23 [00:08<00:06,  1.65it/s]

Filtered by 1 adm2 names: ['Presidente Roque Saenz Pena']


Processing rows:  57%|█████▋    | 13/23 [00:09<00:05,  1.67it/s]

Filtered by 7 adm2 names: ['Arrecifes', 'La Matanza', 'Lanus', 'Lobos', 'Lomas de Zamora', 'Marcos Paz', 'La Plata']


Processing rows:  61%|██████    | 14/23 [00:10<00:06,  1.32it/s]

Filtered by 6 adm1 names: ['Chaco', 'Corrientes', 'Entre Rios', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']


Processing rows:  65%|██████▌   | 15/23 [00:10<00:05,  1.47it/s]

Filtered by 3 adm1 names: ['Chaco', 'Corrientes', 'Formosa']


Processing rows:  70%|██████▉   | 16/23 [00:11<00:04,  1.57it/s]

Filtered by 3 adm2 names: ['Belgrano', 'General Taboada', 'Juan F. Ibarra']


Processing rows:  74%|███████▍  | 17/23 [00:12<00:04,  1.41it/s]

Filtered by 4 adm1 names: ['Chaco', 'La Rioja', 'Salta', 'Tucuman']


Processing rows:  78%|███████▊  | 18/23 [00:12<00:03,  1.56it/s]

Filtered by 1 adm1 names: ['Catamarca']


Processing rows:  83%|████████▎ | 19/23 [00:13<00:02,  1.68it/s]

Filtered by 1 adm2 names: ['Quilmes']


Processing rows: 100%|██████████| 23/23 [00:14<00:00,  1.62it/s]

No admin names found!
No admin names found!
No admin names found!

Total rows: 23
Rows with imagery: 16
Total items found: 211

Rows with imagery:
  Row 22: 2 items (2014-11-04/2014-11-06)
  Row 23: 21 items (2015-03-04/2015-03-06)
  Row 24: 5 items (2015-08-12/2015-08-14)
  Row 25: 6 items (2016-01-11/2016-01-13)
  Row 26: 19 items (2016-04-15/2016-04-17)
  Row 27: 26 items (2016-12-26/2016-12-28)
  Row 28: 48 items (2017-04-07/2017-04-09)
  Row 29: 5 items (2017-06-14/2017-06-16)
  Row 31: 16 items (2018-02-21/2018-02-23)
  Row 33: 3 items (2018-11-12/2018-11-14)
  Row 34: 20 items (2019-01-17/2019-01-19)
  Row 35: 12 items (2019-04-23/2019-04-25)
  Row 36: 3 items (2019-05-20/2019-05-22)
  Row 37: 13 items (2020-02-19/2020-02-21)
  Row 38: 11 items (2021-03-01/2021-03-03)
  Row 39: 1 items (2023-07-05/2023-07-07)





In [10]:
import pandas as pd
from tqdm import tqdm

# Filter out observations before 2014
emdat_filtered = emdat[emdat["end_date"].notna()].copy()
emdat_filtered["end_date_obj"] = pd.to_datetime(emdat_filtered["end_date"])
emdat_filtered = emdat_filtered[emdat_filtered["end_date_obj"] >= "2014-01-01"]

print("=== FILTERING RESULTS ===")
print(f"Original rows: {len(emdat)}")
print(f"Rows with valid end_date: {len(emdat[emdat['end_date'].notna()])}")
print(f"Rows after 2014 filter: {len(emdat_filtered)}")

# Initialize results storage
results = []

print("=== SEARCHING SENTINEL-2 L2A FOR ALL EMDAT ROWS ===")

# Process each row
for idx, row in tqdm(
    emdat_filtered.iterrows(), total=len(emdat_filtered), desc="Processing rows"
):
    try:
        # Get event name, fallback to row index if missing or nan
        event_name = row.get("Event Name", None)
        if pd.isna(event_name) or not event_name:
            event_name = f"Event_{idx}"

        # Get bounding box for this row
        bbox_geometry, bbox_dict = get_flood_bounding_box(row, "ARG")

        if bbox_geometry is None:
            print(f"[Row {idx}] No bounding box available for event '{event_name}'")
            results.append(
                {
                    "row_index": idx,
                    "event_name": event_name,
                    "end_date": row.get("end_date", None),
                    "datetime_range": row.get("datetime_range", None),
                    "bbox_available": False,
                    "items_found": 0,
                    "error": "No bounding box available",
                }
            )
            continue

        # Convert shapely geometry bounds to bbox format
        bounds = bbox_geometry.bounds
        bbox = [bounds[0], bounds[1], bounds[2], bounds[3]]

        # Get datetime range
        datetime_range = row.get("datetime_range")

        if datetime_range is None:
            print(f"[Row {idx}] No valid date range for event '{event_name}'")
            results.append(
                {
                    "row_index": idx,
                    "event_name": event_name,
                    "end_date": row.get("end_date", None),
                    "datetime_range": None,
                    "bbox_available": True,
                    "items_found": 0,
                    "error": "No valid date range",
                }
            )
            continue

        # Print search parameters for debugging
        print(f"\n[Row {idx}] Searching Sentinel-2 L2A for event '{event_name}'")
        print(f"  BBOX: {bbox}")
        print(f"  Date range: {datetime_range}")
        print(
            "  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}"
        )

        # Search for Sentinel-2 L2A data with cloud cover filtering
        search = catalog.search(
            collections=["sentinel-2-l2a"],
            bbox=bbox,
            datetime=datetime_range,
            query={
                "eo:cloud_cover": {"lt": 20},  # Less than 50% cloud cover
            },
        )

        items = search.item_collection()
        print(f"  Items found: {len(items)}")

        # Store results
        results.append(
            {
                "row_index": idx,
                "event_name": event_name,
                "end_date": row.get("end_date", None),
                "datetime_range": datetime_range,
                "bbox_available": True,
                "items_found": len(items),
                "bbox_coords": bbox,
                "error": None,
            }
        )

    except Exception as e:
        print(f"[Row {idx}] ERROR: {e}")
        results.append(
            {
                "row_index": idx,
                "event_name": event_name,
                "end_date": row.get("end_date", None),
                "datetime_range": row.get("datetime_range", None),
                "bbox_available": False,
                "items_found": 0,
                "error": str(e),
            }
        )

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Summary statistics
print(f"\nTotal rows: {len(results_df)}")
print(f"Rows with imagery: {(results_df['items_found'] > 0).sum()}")
print(f"Total items found: {results_df['items_found'].sum()}")

=== FILTERING RESULTS ===
Original rows: 43
Rows with valid end_date: 41
Rows after 2014 filter: 23
=== SEARCHING SENTINEL-2 L2A FOR ALL EMDAT ROWS ===


Processing rows:   0%|          | 0/23 [00:00<?, ?it/s]

Filtered by 7 adm1 names: ['Catamarca', 'Cordoba', 'Entre Rios', 'Neuquen', 'Rio Negro', 'Santa Fe', 'Santiago Del Estero']

[Row 20] Searching Sentinel-2 L2A for event 'Event_20'
  BBOX: [-69.114097, -34.384612, -58.807222, -25.129197]
  Date range: 2014-04-08/2014-04-10
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:   4%|▍         | 1/23 [00:00<00:06,  3.23it/s]

  Items found: 0
Filtered by 4 adm1 names: ['Chaco', 'Corrientes', 'Formosa', 'Misiones']

[Row 21] Searching Sentinel-2 L2A for event 'Event_21'
  BBOX: [-63.271647, -30.591262, -53.601347, -22.512505]
  Date range: 2014-06-30/2014-07-02
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:   9%|▊         | 2/23 [00:00<00:06,  3.04it/s]

  Items found: 0
Filtered by 2 adm1 names: ['Buenos Aires', 'Buenos Aires D.f.']

[Row 22] Searching Sentinel-2 L2A for event 'Event_22'
  BBOX: [-63.418455, -41.033855, -56.641502, -30.151774]
  Date range: 2014-11-04/2014-11-06
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  13%|█▎        | 3/23 [00:00<00:06,  3.12it/s]

  Items found: 0
Filtered by 9 adm1 names: ['Catamarca', 'Chaco', 'Cordoba', 'Corrientes', 'Salta', 'San Luis', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']

[Row 23] Searching Sentinel-2 L2A for event 'Event_23'
  BBOX: [-69.114097, -36.000164, -55.609897, -22.000757]
  Date range: 2015-03-04/2015-03-06
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  17%|█▋        | 4/23 [00:01<00:06,  3.13it/s]

  Items found: 0
Filtered by 20 adm2 names: ['Arrecifes', 'Berisso', 'Campana', 'Chacabuco', 'Chivilcoy', 'General Pueyrredon', 'Junin', 'Lujan', 'Mercedes', 'Pergamino', 'Pila', 'Quilmes', 'Salto', 'San Andres de Giles', 'San Antonio de Areco', 'La Plata', 'Tres de Febrero', 'Zarate', 'Pilar', 'General  Lopez']

[Row 24] Searching Sentinel-2 L2A for event 'Event_24'
  BBOX: [-65.578013, -36.615345, -57.164433, -26.758911]
  Date range: 2015-08-12/2015-08-14
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  22%|██▏       | 5/23 [00:02<00:09,  1.92it/s]

  Items found: 0
Filtered by 10 adm2 names: ['Curuzu Cuatia', 'Esquina', 'Goya', 'Lavalle', 'Paso de los Libres', 'San Cosme', 'Gualeguaychu', 'Parana', 'Colon', 'Concordia']

[Row 25] Searching Sentinel-2 L2A for event 'Event_25'
  BBOX: [-68.695168, -33.119871, -56.838651, -27.265963]
  Date range: 2016-01-11/2016-01-13
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  26%|██▌       | 6/23 [00:04<00:18,  1.11s/it]

  Items found: 1
Filtered by 8 adm1 names: ['Buenos Aires', 'Chaco', 'Cordoba', 'Corrientes', 'Entre Rios', 'Formosa', 'Santa Fe', 'Santiago Del Estero']

[Row 26] Searching Sentinel-2 L2A for event 'Event_26'
  BBOX: [-65.057829, -41.033855, -55.609897, -22.512505]
  Date range: 2016-04-15/2016-04-17
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  30%|███       | 7/23 [00:05<00:19,  1.20s/it]

  Items found: 40
Filtered by 19 adm2 names: ['Florentino Ameghino', 'Baradero', 'Arrecifes', 'Colon', 'General Villegas', 'Pergamino', 'Ramallo', 'Rojas', 'Salto', 'San Antonio de Areco', 'San Nicolas', 'Colon', 'General Roca', 'Punilla', 'San Javier', 'La Paz', 'Parana', 'General  Lopez', 'Rosario']

[Row 27] Searching Sentinel-2 L2A for event 'Event_27'
  BBOX: [-68.251317, -45.131851, -54.976067, -27.615255]
  Date range: 2016-12-26/2016-12-28
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  35%|███▍      | 8/23 [00:07<00:22,  1.50s/it]

  Items found: 75
Filtered by 15 adm1 names: ['Buenos Aires', 'Catamarca', 'Chubut', 'Cordoba', 'Formosa', 'Jujuy', 'La Pampa', 'Mendoza', 'Misiones', 'Salta', 'San Juan', 'Santa Cruz', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']

[Row 28] Searching Sentinel-2 L2A for event 'Event_28'
  BBOX: [-73.531182, -52.366255, -53.601347, -21.805624]
  Date range: 2017-04-07/2017-04-09
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  39%|███▉      | 9/23 [00:09<00:19,  1.40s/it]

  Items found: 71
Filtered by 2 adm1 names: ['Corrientes', 'Entre Rios']

[Row 29] Searching Sentinel-2 L2A for event 'Event_29'
  BBOX: [-59.620913, -30.591262, -55.609897, -27.301516]
  Date range: 2017-06-14/2017-06-16
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  43%|████▎     | 10/23 [00:09<00:13,  1.07s/it]

  Items found: 0
Filtered by 1 adm2 names: ['Escalante']

[Row 30] Searching Sentinel-2 L2A for event 'Event_30'
  BBOX: [-68.371193, -45.999795, -66.360497, -44.678928]
  Date range: 2017-04-21/2017-04-23
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  48%|████▊     | 11/23 [00:10<00:11,  1.02it/s]

  Items found: 0
Filtered by 2 adm1 names: ['Chaco', 'Salta']

[Row 31] Searching Sentinel-2 L2A for event 'Event_31'
  BBOX: [-68.575573, -28.020576, -58.316378, -22.000757]
  Date range: 2018-02-21/2018-02-23
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  52%|█████▏    | 12/23 [00:11<00:11,  1.00s/it]

  Items found: 45
Filtered by 1 adm2 names: ['Presidente Roque Saenz Pena']


Processing rows:  57%|█████▋    | 13/23 [00:11<00:08,  1.19it/s]

[Row 32] ERROR: No matching administrative units found for ARG at level ADM2
Filtered by 7 adm2 names: ['Arrecifes', 'La Matanza', 'Lanus', 'Lobos', 'Lomas de Zamora', 'Marcos Paz', 'La Plata']

[Row 33] Searching Sentinel-2 L2A for event 'Event_33'
  BBOX: [-60.236801, -35.435944, -57.752731, -33.745392]
  Date range: 2018-11-12/2018-11-14
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  61%|██████    | 14/23 [00:12<00:07,  1.17it/s]

  Items found: 0
Filtered by 6 adm1 names: ['Chaco', 'Corrientes', 'Entre Rios', 'Santa Fe', 'Santiago Del Estero', 'Tucuman']

[Row 34] Searching Sentinel-2 L2A for event 'Event_34'
  BBOX: [-65.057829, -34.384612, -55.609897, -24.164428]
  Date range: 2019-01-17/2019-01-19
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  65%|██████▌   | 15/23 [00:13<00:06,  1.19it/s]

  Items found: 43
Filtered by 3 adm1 names: ['Chaco', 'Corrientes', 'Formosa']

[Row 35] Searching Sentinel-2 L2A for event 'Event_35'
  BBOX: [-63.271647, -30.591262, -55.609897, -22.512505]
  Date range: 2019-04-23/2019-04-25
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  70%|██████▉   | 16/23 [00:13<00:05,  1.38it/s]

  Items found: 14
Filtered by 3 adm2 names: ['Belgrano', 'General Taboada', 'Juan F. Ibarra']

[Row 36] Searching Sentinel-2 L2A for event 'Event_36'
  BBOX: [-67.320481, -33.206578, -61.437693, -27.721172]
  Date range: 2019-05-20/2019-05-22
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  74%|███████▍  | 17/23 [00:14<00:04,  1.28it/s]

  Items found: 0
Filtered by 4 adm1 names: ['Chaco', 'La Rioja', 'Salta', 'Tucuman']

[Row 37] Searching Sentinel-2 L2A for event 'Event_37'
  BBOX: [-68.575573, -28.020576, -58.316378, -22.000757]
  Date range: 2020-02-19/2020-02-21
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  78%|███████▊  | 18/23 [00:15<00:03,  1.47it/s]

  Items found: 10
Filtered by 1 adm1 names: ['Catamarca']

[Row 38] Searching Sentinel-2 L2A for event 'Event_38'
  BBOX: [-69.114097, -30.116125, -64.839508, -25.129197]
  Date range: 2021-03-01/2021-03-03
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows:  83%|████████▎ | 19/23 [00:15<00:02,  1.66it/s]

  Items found: 8
Filtered by 1 adm2 names: ['Quilmes']

[Row 39] Searching Sentinel-2 L2A for event 'Event_39'
  BBOX: [-58.346916, -34.801014, -58.194752, -34.678736]
  Date range: 2023-07-05/2023-07-07
  Query: {'eo:cloud_cover': {'lt': 20}, 'platform': {'in': ['sentinel-2a', 'sentinel-2b']}}


Processing rows: 100%|██████████| 23/23 [00:16<00:00,  1.39it/s]

  Items found: 0
No admin names found!
[Row 40] No bounding box available for event 'Event_40'
No admin names found!
[Row 41] No bounding box available for event 'Event_41'
No admin names found!
[Row 42] No bounding box available for event 'Event_42'

Total rows: 23
Rows with imagery: 9
Total items found: 307





In [21]:
results_df

Unnamed: 0,row_index,event_name,end_date,datetime_range,bbox_available,items_found,bbox_coords,error
0,20,Event_20,2014-04-08,2014-04-08/2014-04-10,True,0,"[-69.114097, -34.384612, -58.807222, -25.129197]",
1,21,Event_21,2014-06-30,2014-06-30/2014-07-02,True,0,"[-63.271647, -30.591262, -53.601347, -22.512505]",
2,22,Event_22,2014-11-04,2014-11-04/2014-11-06,True,0,"[-63.418455, -41.033855, -56.641502, -30.151774]",
3,23,Event_23,2015-03-04,2015-03-04/2015-03-06,True,0,"[-69.114097, -36.000164, -55.609897, -22.000757]",
4,24,Event_24,2015-08-12,2015-08-12/2015-08-14,True,0,"[-65.578013, -36.615345, -57.164433, -26.758911]",
5,25,Event_25,2016-01-11,2016-01-11/2016-01-13,True,1,"[-68.695168, -33.119871, -56.838651, -27.265963]",
6,26,Event_26,2016-04-15,2016-04-15/2016-04-17,True,40,"[-65.057829, -41.033855, -55.609897, -22.512505]",
7,27,Event_27,2016-12-26,2016-12-26/2016-12-28,True,75,"[-68.251317, -45.131851, -54.976067, -27.615255]",
8,28,Event_28,2017-04-07,2017-04-07/2017-04-09,True,71,"[-73.531182, -52.366255, -53.601347, -21.805624]",
9,29,Event_29,2017-06-14,2017-06-14/2017-06-16,True,0,"[-59.620913, -30.591262, -55.609897, -27.301516]",


In [None]:
import rioxarray as rio
import numpy as np
import os

# Get the first row with imagery from your existing results
rows_with_imagery = results_df[results_df["items_found"] > 0]
if len(rows_with_imagery) > 0:
    # Get the first row with imagery
    first_row = rows_with_imagery.iloc[0]
    row_index = first_row["row_index"]

    print(f"Loading imagery for Row {row_index}: {first_row['items_found']} items")

    # Re-run just the search part to get the items
    original_row = emdat.iloc[row_index]
    bbox_geometry, bbox_dict = get_flood_bounding_box(original_row, "ARG")
    bounds = bbox_geometry.bounds
    bbox = [bounds[0], bounds[1], bounds[2], bounds[3]]
    datetime_range = original_row.get("datetime_range")

    # Search for Sentinel-1 RTC data
    search = catalog.search(
        collections=["sentinel-1-rtc"],
        bbox=bbox,
        datetime=datetime_range,
        query={
            "sar:instrument_mode": {"eq": "IW"},
            "sar:frequency_band": {"eq": "C"},
            "sar:polarizations": {"in": [["VV"], ["VH"], ["VV", "VH"]]},
        },
    )

    items = search.item_collection()

    # Load and save the first item
    item = items[0]
    print(f"Loading item: {item.id}")
    print(f"Date: {item.datetime}")

    # Load the VV band (or VH if VV not available)
    if "vv" in item.assets:
        asset_key = "vv"
    else:
        asset_key = "vh"

    print(f"Loading {asset_key} band...")
    ds = rio.open_rasterio(item.assets[asset_key].href)

    # Create output directory if it doesn't exist
    output_dir = "/home/nissim/Documents/dev/arg-inundaciones/data/"
    os.makedirs(output_dir, exist_ok=True)

    # Create filename
    filename = f"row_{row_index}_{asset_key}_{item.datetime.strftime('%Y%m%d')}.tif"
    output_path = os.path.join(output_dir, filename)

    print(f"Saving to: {output_path}")

    # Save as COG
    ds.rio.to_raster(
        output_path,
        driver="COG",
        compress="LZW",
        tiled=True,
        blockxsize=512,
        blockysize=512,
        overview_levels=[2, 4, 8, 16],
        overview_resampling="nearest",
    )

    print("Saved COG successfully!")
    print(f"File size: {os.path.getsize(output_path) / (1024 * 1024):.2f} MB")

else:
    print("No imagery found!")

Loading imagery for Row 22: 2 items
Filtered by 2 adm1 names: ['Buenos Aires', 'Buenos Aires D.f.']
Loading item: S1A_IW_GRDH_1SSV_20141106T093826_20141106T093851_003159_003A22_rtc
Date: 2014-11-06 09:38:38.636304+00:00
Loading vv band...
Saving to: /home/nissim/Documents/dev/arg-inundaciones/data/row_22_vv_20141106.tif
Saved COG successfully!
File size: 2595.91 MB
Data shape: (23544, 30924)
Data range: -32768.000000 to 1203.246826


In [13]:
import rioxarray as rio
import os
import xarray as xr
import dask
from dask.distributed import Client

# Configure Dask for memory-efficient processing
# Limit concurrent tasks to control peak memory usage
dask.config.set(
    {
        "array.slicing.split_large_chunks": False,
        "distributed.worker.memory.target": 0.8,  # Use 80% of worker memory before spilling
        "distributed.worker.memory.spill": 0.9,  # Spill to disk at 90%
    }
)

# Optional: Start a local Dask client with limited workers
# Adjust n_workers based on your system (start with 2-4 for 64GB RAM)
client = Client(n_workers=4, threads_per_worker=2, memory_limit="8GB")
print(f"Dask dashboard: {client.dashboard_link}")

# Get the first row with imagery from your S2 results
rows_with_imagery = results_df[results_df["items_found"] > 0]
if len(rows_with_imagery) > 0:
    # Get the first row with imagery
    first_row = rows_with_imagery.iloc[0]
    row_index = first_row["row_index"]
    print(
        f"Loading Sentinel-2 imagery for Row {row_index}: {first_row['items_found']} items"
    )

    # Use .loc to get the correct row
    original_row = emdat_filtered.loc[row_index]
    bbox_geometry, bbox_dict = get_flood_bounding_box(original_row, "ARG")
    bounds = bbox_geometry.bounds
    bbox = [bounds[0], bounds[1], bounds[2], bounds[3]]
    datetime_range = original_row.get("datetime_range")

    # Search for Sentinel-2 L2A data
    search = catalog.search(
        collections=["sentinel-2-l2a"],
        bbox=bbox,
        datetime=datetime_range,
        query={
            "eo:cloud_cover": {"lt": 20},  # Less than 20% cloud cover
        },
    )
    items = search.item_collection()

    # Load all bands for the first item with Dask chunking
    item = items[0]
    print(f"Loading item: {item.id}")
    print(f"Date: {item.datetime}")

    # List all band asset keys (usually B01, B02, ..., B12, B8A, etc.)
    band_keys = [k for k in item.assets.keys() if k.startswith("B")]
    band_keys.sort()  # Sort for consistency
    print(f"Bands to load: {band_keys}")

    # Define chunk size for memory-efficient processing
    # 2048x2048 is usually good for regional data - adjust if needed
    chunk_size = {"x": 2048, "y": 2048}

    # Group bands by resolution to avoid unwanted resampling during concatenation
    bands_10m = []  # B02, B03, B04, B08 (10980x10980)
    bands_20m = []  # B05, B06, B07, B11, B12, B8A (5490x5490)
    bands_60m = []  # B01, B09 (1830x1830)

    print("Loading bands with Dask chunking...")
    for band in band_keys:
        print(f"  Loading band: {band}")
        # Load with chunking - this creates a Dask array (lazy evaluation)
        da = rio.open_rasterio(
            item.assets[band].href,
            chunks=chunk_size,
            lock=False,  # Allow concurrent access to the same file
        )
        # Set the band name for clarity
        da = da.assign_coords(band=[band])

        # Group by resolution based on spatial dimensions
        if da.shape[1] > 8000:  # 10m bands (~10980x10980)
            bands_10m.append(da)
        elif da.shape[1] > 4000:  # 20m bands (~5490x5490)
            bands_20m.append(da)
        else:  # 60m bands (~1830x1830)
            bands_60m.append(da)

        print(f"    Band {band} shape: {da.shape}, chunks: {da.chunks}")

    print("Concatenating bands by resolution groups...")

    # Process each resolution group separately to maintain chunking
    resolution_groups = []

    if bands_10m:
        print(f"  Concatenating {len(bands_10m)} bands at 10m resolution")
        multi_10m = xr.concat(bands_10m, dim="band")
        resolution_groups.append(("10m", multi_10m))

    if bands_20m:
        print(f"  Concatenating {len(bands_20m)} bands at 20m resolution")
        multi_20m = xr.concat(bands_20m, dim="band")
        resolution_groups.append(("20m", multi_20m))

    if bands_60m:
        print(f"  Concatenating {len(bands_60m)} bands at 60m resolution")
        multi_60m = xr.concat(bands_60m, dim="band")
        resolution_groups.append(("60m", multi_60m))

    # For now, let's work with the highest resolution group (10m)
    # You can modify this logic based on your needs
    if bands_10m:
        multi_band = multi_10m
        print("Using 10m resolution bands for output")
    elif bands_20m:
        multi_band = multi_20m
        print("Using 20m resolution bands for output")
    else:
        multi_band = multi_60m
        print("Using 60m resolution bands for output")

    print(f"Multi-band array shape: {multi_band.shape}")
    print(f"Multi-band chunks: {multi_band.chunks}")
    # Calculate estimated memory per chunk (chunks is a tuple of tuples)
    chunk_sizes = [len(c) for c in multi_band.chunks]
    total_chunks = np.prod(chunk_sizes)
    print(f"Estimated memory per chunk: {multi_band.nbytes / total_chunks:.2f} bytes")

    # Create output directory if it doesn't exist
    output_dir = "/home/nissim/Documents/dev/arg-inundaciones/data/"
    os.makedirs(output_dir, exist_ok=True)

    # Create filename
    filename = f"s2_row_{row_index}_allbands_{item.datetime.strftime('%Y%m%d')}.tif"
    output_path = os.path.join(output_dir, filename)
    print(f"Saving to: {output_path}")

    # Save as COG with Dask-enabled chunked writing
    # This will process chunks sequentially, keeping memory usage bounded
    print("Starting chunked write to COG (this may take a while)...")
    with dask.config.set(scheduler="threads"):  # Use threads for I/O
        multi_band.rio.to_raster(
            output_path,
            driver="COG",
            compress="LZW",
            tiled=True,
            blockxsize=512,
            blockysize=512,
            overview_levels=[2, 4, 8, 16],
            overview_resampling="nearest",
            # COG-specific optimizations
            bigtiff="auto",  # Use BigTIFF for large files
        )

    print("Saved multi-band COG successfully!")
    print(f"File size: {os.path.getsize(output_path) / (1024 * 1024):.2f} MB")

    # Clean up Dask client
    client.close()

else:
    print("No Sentinel-2 imagery found!")

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42117 instead


Dask dashboard: http://127.0.0.1:42117/status
Loading Sentinel-2 imagery for Row 25: 1 items
Filtered by 10 adm2 names: ['Curuzu Cuatia', 'Esquina', 'Goya', 'Lavalle', 'Paso de los Libres', 'San Cosme', 'Gualeguaychu', 'Parana', 'Colon', 'Concordia']
Loading item: S2A_MSIL2A_20160113T135952_R067_T20HPJ_20210527T163414
Date: 2016-01-13 13:59:52.029000+00:00
Bands to load: ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A']
Loading bands with Dask chunking...
  Loading band: B01
    Band B01 shape: (1, 1830, 1830), chunks: ((1,), (1830,), (1830,))
  Loading band: B02
    Band B02 shape: (1, 10980, 10980), chunks: ((1,), (2048, 2048, 2048, 2048, 2048, 740), (2048, 2048, 2048, 2048, 2048, 740))
  Loading band: B03
    Band B03 shape: (1, 10980, 10980), chunks: ((1,), (2048, 2048, 2048, 2048, 2048, 740), (2048, 2048, 2048, 2048, 2048, 740))
  Loading band: B04
    Band B04 shape: (1, 10980, 10980), chunks: ((1,), (2048, 2048, 2048, 2048, 2048, 740), (2048, 2

In [15]:
import rioxarray as rio

da = rio.open_rasterio(
    "/home/nissim/Documents/dev/arg-inundaciones/data/s2_row_25_allbands_20160113.tif"
)
print(f"Band coordinates: {da.coords['band'].values}")

Band coordinates: [1 2 3 4]


# so, we will have:

In [13]:
# so, we'll have:
# 1: download raw imagery
# 2: process raw imagery compared to pre-flood imagery
# 3: apply algorithmic corrections
# 4: manually correct remaining imagery