In [None]:
# from datetime import datetime, timedelta

# def get_time_window(dt_str, seconds=5):
#     """
#     Convert a datetime string into ISO-like format with offsets before and after.

#     This function takes an input datetime string in the format
#     'YYYY-MM-DD HH:MM:SS' (with a space separator), converts it to
#     'YYYY-MM-DDTHH:MM:SS' (with a 'T' separator), and returns two strings:
#     one representing the time `seconds` before, and another representing
#     the time `seconds` after the given datetime.

#     Parameters
#     ----------
#     dt_str : str
#         A datetime string in the format 'YYYY-MM-DD HH:MM:SS'.
#     seconds : int, optional
#         The number of seconds to offset before and after the given time.
#         Default is 5.

#     Returns
#     -------
#     tuple of str
#         A tuple containing two datetime strings in the format
#         'YYYY-MM-DDTHH:MM:SS':
#         - The first string is the input time minus `seconds`.
#         - The second string is the input time plus `seconds`.

#     Examples
#     --------
#     >>> get_time_window('2020-10-06 11:35:23', seconds=5)
#     ('2020-10-06T11:35:18', '2020-10-06T11:35:28')
#     """
#     # Parse input datetime
#     dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
    
#     # Compute before and after times
#     before = dt - timedelta(seconds=seconds)
#     after = dt + timedelta(seconds=seconds)
    
#     # Return formatted strings
#     return before.strftime("%Y-%m-%dT%H:%M:%S"), after.strftime("%Y-%m-%dT%H:%M:%S")


# # Example usage
# print(get_time_window('2020-10-06 11:35:23', seconds=5))
# # -> ('2020-10-06T11:35:18', '2020-10-06T11:35:28')
# geojson_to_wkt(GeoFootprint) #
# {'type': 'MultiPolygon',
#  'coordinates': [[[[-6.108396, 46.729179],
#     [-5.665298, 48.22562],
#     [-9.178612, 48.633568],
#     [-9.522814, 47.136372],
#     [-6.108396, 46.729179]]]]}

#  = df_exact.iloc[0]['OriginDate']
# start, stop = get_time_indow(OriginDate, seconds=15)
# print(f'Start: {start}, Stop: {stop}')  w

In [1]:
import configparser
from phidown.search import CopernicusDataSearcher
import time

import pandas as pd
import random
from pathlib import Path

GRD_products = [x for x in Path('/Data_large/marine/PythonProjects/SAR/sarpyx/data/xview3/GRD').iterdir() if x.is_dir()]
print(f'Found {len(GRD_products)} L1 products')

Found 754 L1 products


In [None]:
def geojson_to_polygon_wkt(geometry, *, on_multipolygon="first"):
    """
    Convert a GeoJSON geometry to a POLYGON WKT string.

    This function guarantees a POLYGON WKT output. Behavior by geometry type:
    - Polygon: serialized directly to POLYGON (2D or Z).
    - MultiPolygon: by default takes the first polygon and returns it as POLYGON.
      You can control this with `on_multipolygon`.
    - Any other geometry type: raises ValueError.

    Parameters
    ----------
    geometry : dict
        A GeoJSON-like geometry dictionary with keys:
        - 'type' (str): One of {'Polygon','MultiPolygon'} is supported; others raise.
        - 'coordinates' (list): For 'Polygon', a list of linear rings; for 'MultiPolygon',
          a list of polygons (each a list of rings). Rings are expected in GeoJSON order
          [x, y] or [x, y, z]. Rings should be closed (first == last); this function
          does not enforce closure.
    on_multipolygon : {'first', 'error'}, optional
        Behavior when input is a MultiPolygon:
        - 'first' (default): Use the first polygon within the MultiPolygon.
        - 'error': Raise ValueError if there is more than one polygon.
        Note: True topological merging (dissolve/union) is not performed.

    Returns
    -------
    str
        A POLYGON (or 'POLYGON Z') WKT string.

    Raises
    ------
    ValueError
        If geometry type is unsupported, structure is invalid, or MultiPolygon handling
        is set to 'error' with multiple polygons.

    Notes
    -----
    - This function does not perform geometric operations (e.g., union/merge).
    - Presence of any Z value in the chosen polygon promotes output to 'POLYGON Z'.
    """

    def _is_3d_coords(obj):
        """Return True if any coordinate has 3 elements."""
        found = False

        def _walk(o):
            nonlocal found
            if found:
                return
            if isinstance(o, (list, tuple)):
                if o and all(isinstance(v, (int, float)) for v in o):
                    if len(o) >= 3:
                        found = True
                else:
                    for item in o:
                        _walk(item)

        _walk(obj)
        return found

    def _fmt_num(n):
        """Format numbers compactly, removing trailing zeros and unnecessary decimals."""
        if isinstance(n, int):
            return str(n)
        return f"{float(n):.15g}"

    def _fmt_coord(coord):
        return " ".join(_fmt_num(c) for c in coord[:3])  # x y or x y z

    def _fmt_ring(ring):
        return f"({_fmt_coord_list(ring)})"

    def _fmt_coord_list(coords):
        return ", ".join(_fmt_coord(c) for c in coords)

    gtype = geometry.get("type")
    if not gtype:
        raise ValueError("Geometry missing 'type'")

    if gtype == "Polygon":
        rings = geometry.get("coordinates")
        if not isinstance(rings, list):
            raise ValueError("Polygon 'coordinates' must be a list of rings")
        dim = " Z" if _is_3d_coords(rings) else ""
        return f"POLYGON{dim} ({', '.join(_fmt_ring(r) for r in rings)})"

    if gtype == "MultiPolygon":
        polys = geometry.get("coordinates")
        if not isinstance(polys, list) or not polys:
            raise ValueError("MultiPolygon 'coordinates' must be a non-empty list of polygons")
        if on_multipolygon == "error" and len(polys) != 1:
            raise ValueError("MultiPolygon has multiple polygons; set on_multipolygon='first' to pick the first")
        chosen = polys[0]
        if not isinstance(chosen, list):
            raise ValueError("Invalid MultiPolygon structure: expected list of polygons (list of rings)")
        dim = " Z" if _is_3d_coords(chosen) else ""
        return f"POLYGON{dim} ({', '.join(_fmt_ring(r) for r in chosen)})"

    raise ValueError(f"Unsupported geometry type for polygon output: {gtype}")




def get_corresponding_slc(geo_footprint: dict, start: str, end: str) -> str:
    """
    Retrieve the corresponding SLC product name for a given GeoFootprint and time window.

    Args:
        geo_footprint (dict): GeoJSON-like geometry dictionary.
        start (str): Start time in ISO 8601 format.
        end (str): End time in ISO 8601 format.

    Returns:
        str: Name of the corresponding SLC product.
    """
    searcher = CopernicusDataSearcher()
    searcher.query_by_filter(
        collection_name='SENTINEL-1',
        product_type='SLC',
        orbit_direction=None,
        cloud_cover_threshold=None,
        aoi_wkt=geojson_to_polygon_wkt(geo_footprint),
        start_date=start,
        end_date=end,
        top=1000,
        attributes={'processingLevel': 'LEVEL1'}
    )
    df = searcher.execute_query()
    product_name = df.sample(n=1)['Name'].values[0]
    return product_name


def get_corresponding_raw(geo_footprint: dict, start: str, end: str) -> str:
    """
    Retrieve the corresponding L0 product name for a given GeoFootprint and time window.

    Args:
        geo_footprint (dict): GeoJSON-like geometry dictionary.
        start (str): Start time in ISO 8601 format.
        end (str): End time in ISO 8601 format.

    Returns:
        str: Name of the corresponding L0 product.
    """
    searcher = CopernicusDataSearcher()
    searcher.query_by_filter(
        collection_name='SENTINEL-1',
        product_type=None,
        orbit_direction=None,
        cloud_cover_threshold=None,
        aoi_wkt=geojson_to_polygon_wkt(geo_footprint),
        start_date=start,
        end_date=end,
        top=1000,
        attributes={'processingLevel': 'LEVEL0'}
    )
    df = searcher.execute_query()
    product_name = df.sample(n=1)['Name'].values[0]
    return product_name

POLYGON ((-6.108396 46.729179, -5.665298 48.22562, -9.178612 48.633568, -9.522814 47.136372, -6.108396 46.729179))


In [3]:
data_collector = []


for idx, product in enumerate(GRD_products):
    print(f'Using product: {product}')

    # =========== A) GRD info extraction ====================
    try:
        product_name = product.name
        print(f'Product name: {product_name}')

        # find info about the prod 

        searcher_by_name = CopernicusDataSearcher()

        # Replace with an actual product name you want to find
        product_to_find = product_name # Example, replace with a recent, valid name

        print(f"Searching for product with exact name: {product_to_find}\n")
        df_exact = searcher_by_name.query_by_name(product_name=product_to_find)

        if not df_exact.empty:
            searcher_by_name.display_results(top_n=1)
            # display(df_exact)
        else:
            print(f"Product '{product_to_find}' not found or an error occurred.")
            
            
        GeoFootprint = df_exact.iloc[0]['GeoFootprint']
        ContentDate = df_exact.iloc[0]['ContentDate']

        start = ContentDate['Start']
        end = ContentDate['End']
        # ========================= End Extract product info =========================
    
    except Exception as e:
        print(f"Error extracting product info: {e}")
        print(f"Exception occurred for product: {product}")
        continue


    try:
        # =========== B) Find corresponding products ====================
        RAW_product_name = get_corresponding_raw(GeoFootprint, start, end)
        # print(f'Found corresponding L0 product: {L0_product_name}')
        SLC_product_name = get_corresponding_slc(GeoFootprint, start, end)
        # print(f'Found corresponding SLC product: {SLC_product_name}')
        
        data_collector.append({
            'GRD': product_name,
            'SLC': SLC_product_name,
            'L0': RAW_product_name
        })        
        # save as pkl after each append
        pd.to_pickle(data_collector, 'xview3_multi.pkl')
        time.sleep(random.uniform(1, 3))  # Sleep between 1 to 3 seconds to avoid overwhelming the server
        
    except Exception as e:
        print(f"Error finding corresponding L0 product: {e}")
        continue
    


Using product: /Data_large/marine/PythonProjects/SAR/sarpyx/data/xview3/GRD/S1A_IW_GRDH_1SDV_20201006T064054_20201006T064119_034672_0409DA_8975.SAFE
Product name: S1A_IW_GRDH_1SDV_20201006T064054_20201006T064119_034672_0409DA_8975.SAFE
Searching for product with exact name: S1A_IW_GRDH_1SDV_20201006T064054_20201006T064119_034672_0409DA_8975.SAFE

WKT polygon normalized: Whitespace and formatting corrected
WKT polygon normalized: Whitespace and formatting corrected
Using product: /Data_large/marine/PythonProjects/SAR/sarpyx/data/xview3/GRD/S1A_IW_GRDH_1SDV_20201105T183650_20201105T183715_035117_041924_224E.SAFE
Product name: S1A_IW_GRDH_1SDV_20201105T183650_20201105T183715_035117_041924_224E.SAFE
Searching for product with exact name: S1A_IW_GRDH_1SDV_20201105T183650_20201105T183715_035117_041924_224E.SAFE

WKT polygon normalized: Whitespace and formatting corrected
WKT polygon normalized: Whitespace and formatting corrected
Using product: /Data_large/marine/PythonProjects/SAR/sarpyx/da

In [None]:
df


In [None]:
[{'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'timeliness', 'Value': 'Fast-24h', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'coordinates', 'Value': '45.460442,-12.755283 45.857189,-9.512984 44.240639,-9.156878 43.844185,-12.306799 45.460442,-12.755283', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.IntegerAttribute', 'Name': 'orbitNumber', 'Value': 35117, 'ValueType': 'Integer'}, {'@odata.type': '#OData.CSC.IntegerAttribute', 'Name': 'sliceNumber', 'Value': 11, 'ValueType': 'Integer'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'productClass', 'Value': 'S', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'orbitDirection', 'Value': 'ASCENDING', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'operationalMode', 'Value': 'IW', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'processingLevel', 'Value': 'LEVEL1', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'swathIdentifier', 'Value': 'IW1 IW2 IW3', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'platformShortName', 'Value': 'SENTINEL-1', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'instrumentShortName', 'Value': 'SAR', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.IntegerAttribute', 'Name': 'relativeOrbitNumber', 'Value': 45, 'ValueType': 'Integer'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'polarisationChannels', 'Value': 'VV&VH', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'platformSerialIdentifier', 'Value': 'A', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.StringAttribute', 'Name': 'productType', 'Value': 'IW_SLC__1S', 'ValueType': 'String'}, {'@odata.type': '#OData.CSC.DateTimeOffsetAttribute', 'Name': 'beginningDateTime', 'Value': '2020-11-05T18:37:14.092000Z', 'ValueType': 'DateTimeOffset'}, {'@odata.type': '#OData.CSC.DateTimeOffsetAttribute', 'Name': 'endingDateTime', 'Value': '2020-11-05T18:37:41.052000Z', 'ValueType': 'DateTimeOffset'}]

In [None]:
print(len(L1_L0_pairs))