In [None]:
import pandas as pd

# Load CSV
df = pd.read_csv("all_filaments_pixels_combined.csv")

# Example: convert one filament
fid = 0
sub = df[df['filament_id'] == fid].copy()

# Get centroid info
lat0 = sub['lat_centroid'].iloc[0]
lon0 = sub['lon_centroid'].iloc[0]
x0 = sub['x_centroid'].iloc[0]
y0 = sub['y_centroid'].iloc[0]

# Approximate meters per degree at that lat
meters_per_deg_lat = 111_000
meters_per_deg_lon = 111_320 * abs(np.cos(np.radians(lat0)))

# Each pixel = 10 meters, so offset in meters from centroid:
dx = (sub['pixel_x'] - x0) * 10  # east-west in meters
dy = (sub['pixel_y'] - y0) * 10  # north-south in meters

# Convert meters to degrees
sub['lat'] = lat0 - dy / meters_per_deg_lat
sub['lon'] = lon0 + dx / meters_per_deg_lon

# Save to new CSV
sub[['filament_id', 'lat', 'lon']].to_csv(f"filament_{fid}_latlon.csv", index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'all_filaments_pixels_combined.csv'

In [None]:
df['filament_id'].max()

14373

In [None]:
m_df = pd.read_csv("/content/filaments_metadata.csv")
m_df.head()

In [None]:
m_df.columns

Index(['s2_product', 'dec_time', 'x_centroid', 'y_centroid', 'lat_centroid',
       'lon_centroid', 'n_pixels_fil', 'bounding_box_x_lower',
       'bounding_box_y_lower', 'bounding_box_x_upper', 'bounding_box_y_upper'],
      dtype='object')

In [None]:
m_df.columns

Index(['s2_product', 'dec_time', 'x_centroid', 'y_centroid', 'lat_centroid',
       'lon_centroid', 'n_pixels_fil', 'bounding_box_x_lower',
       'bounding_box_y_lower', 'bounding_box_x_upper', 'bounding_box_y_upper'],
      dtype='object')

In [None]:
len(df)

246515

In [None]:
m_df['s2_product'].nunique()

4472

In [None]:
len(m_df)

14374

In [None]:
import pandas as pd

# Load data
all_df = pd.read_csv("all_filaments_pixels_combined.csv")
meta_df = pd.read_csv("filaments_metadata.csv")

# Make sure filament_id is present in metadata
# Only do this if it's missing
meta_df['filament_id'] = meta_df.index

# Merge them
enriched_df = pd.merge(
    all_df,
    meta_df,
    on="filament_id",
    how="left"
)


In [None]:
enriched_df.head()

Unnamed: 0,filament_id,pixel_index,pixel_x,pixel_y,band_B01,band_B02,band_B03,band_B04,band_B05,band_B06,...,dec_time,x_centroid,y_centroid,lat_centroid,lon_centroid,n_pixels_fil,bounding_box_x_lower,bounding_box_y_lower,bounding_box_x_upper,bounding_box_y_upper
0,2000,0,9354,7205,0.1362,0.1081,0.0805,0.0491,0.0448,0.0406,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
1,2000,1,9369,7201,0.1356,0.1065,0.078,0.0491,0.0438,0.0423,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
2,2000,2,9384,7197,0.1356,0.1069,0.08,0.0482,0.0451,0.0445,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
3,2000,3,9399,7193,0.1365,0.1072,0.0797,0.049,0.0458,0.0429,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
4,2000,4,9402,7192,0.1365,0.1077,0.08,0.05,0.0478,0.046,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496


In [None]:
enriched_df['s2_product'].nunique()

4472

In [None]:
enriched_df.columns

Index(['filament_id', 'pixel_index', 'pixel_x', 'pixel_y', 'band_B01',
       'band_B02', 'band_B03', 'band_B04', 'band_B05', 'band_B06', 'band_B07',
       'band_B08', 'band_B8A', 'band_B09', 'band_B10', 'band_B11', 'band_B12',
       's2_product', 'dec_time', 'x_centroid', 'y_centroid', 'lat_centroid',
       'lon_centroid', 'n_pixels_fil', 'bounding_box_x_lower',
       'bounding_box_y_lower', 'bounding_box_x_upper', 'bounding_box_y_upper'],
      dtype='object')

In [None]:
len(enriched_df)

944592

In [None]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

output_folder = "png_masks"
os.makedirs(output_folder, exist_ok=True)

mask_size = 256
half_size = mask_size // 2

# --- Group by filament ---
for i, (filament_id, group) in enumerate(enriched_df.groupby("filament_id")):
    # Get pixel coordinates (image space)
    px = group['pixel_x'].values.astype(int)
    py = group['pixel_y'].values.astype(int)

    # Get image-space centroid (not lat/lon)
    cx = int(group['x_centroid'].iloc[0])
    cy = int(group['y_centroid'].iloc[0])

    # Create empty binary mask
    mask = np.zeros((mask_size, mask_size), dtype=np.uint8)

    # Map each pixel relative to centroid
    for x, y in zip(px, py):
        rel_x = x - cx + half_size
        rel_y = y - cy + half_size

        if 0 <= rel_x < mask_size and 0 <= rel_y < mask_size:
            mask[rel_y, rel_x] = 1

    # Save mask as PNG (multiply by 255 to get black/white image)
    filename = os.path.join(output_folder, f"mask_{filament_id}.png")
    Image.fromarray(mask * 255).save(filename)

    # Optional: visualize
    # plt.imshow(mask, cmap='gray')
    # plt.title(f"Mask for Filament #{filament_id}")
    # plt.axis('off')
    # plt.show()


In [None]:
import pandas as pd
import os
from datetime import datetime

# --- Setup ---
output_csv = "patch_metadata.csv"

MASK_DIR = "masks"
IMG_DIR = "geotiffs"

os.makedirs("patch_metadata", exist_ok=True)

# Keep one row per filament
filament_meta = enriched_df.drop_duplicates("filament_id")[[
    "filament_id",
    "lat_centroid", "lon_centroid",
    "x_centroid", "y_centroid",
    "dec_time", "s2_product"
]].copy()

# Convert decimal year to date
def decimal_year_to_date(dec_year):
    year = int(dec_year)
    days = int((dec_year - year) * 365.25)
    return (datetime(year, 1, 1) + pd.Timedelta(days=days)).strftime('%Y-%m-%d')

filament_meta["acquisition_date"] = filament_meta["dec_time"].apply(decimal_year_to_date)

# Rename columns
filament_meta = filament_meta.rename(columns={
    "lat_centroid": "lat_center",
    "lon_centroid": "lon_center",
    "x_centroid": "x_center",
    "y_centroid": "y_center"
})

# Use fixed 256×256 patch centered at centroid (128 px on each side)
half_size = 128
filament_meta["x_min"] = filament_meta["x_center"] - half_size
filament_meta["x_max"] = filament_meta["x_center"] + half_size
filament_meta["y_min"] = filament_meta["y_center"] - half_size
filament_meta["y_max"] = filament_meta["y_center"] + half_size

# Add file paths
filament_meta["mask_path"] = filament_meta["filament_id"].apply(
    lambda fid: f"{MASK_DIR}/mask_{fid}.png"
)
filament_meta["image_path"] = filament_meta["filament_id"].apply(
    lambda fid: f"{IMG_DIR}/image_{fid}.tif"
)

# Final columns
final_df = filament_meta[[
    "filament_id",
    "lat_center", "lon_center",
    "x_center", "y_center",
    "x_min", "x_max", "y_min", "y_max",
    "acquisition_date", "s2_product",
    "image_path", "mask_path"
]]

# Save
final_df.to_csv(output_csv, index=False)
print(f"✅ Saved patch metadata with fixed-size tiles to: {output_csv}")
print(final_df.head())


✅ Saved patch metadata with fixed-size tiles to: patch_metadata.csv
     filament_id  lat_center  lon_center  x_center  y_center  x_min  x_max  \
0           2000   34.376412   10.869906      9409      7188   9281   9537   
12          2001   34.930481   11.026350      3369      8656   3241   3497   
51          2002   34.895719   11.048051      3749      8860   3621   3877   
99          2003   34.782466   11.085260      4986      9210   4858   5114   
125         2004   34.695855   10.773533      5931      6286   5803   6059   

     y_min  y_max acquisition_date  \
0     7060   7316       2017-09-18   
12    8528   8784       2017-09-28   
51    8732   8988       2017-09-28   
99    9082   9338       2017-09-28   
125   6158   6414       2017-09-28   

                                            s2_product  \
0    S2A_MSIL1C_20170918T100021_N0205_R122_T32SPD_2...   
12   S2A_MSIL1C_20170928T100021_N0205_R122_T32SPD_2...   
51   S2A_MSIL1C_20170928T100021_N0205_R122_T32SPD_2...   
99

In [4]:
!pip install sentinelsat




In [None]:
#sh-6fdecf64-d5c9-4862-ae2e-c30c7368d738
#W92EDXXTIK2HR4R0bn5FOZEHeNEdkzbY

In [5]:
!pip install sentinelhub

Collecting sentinelhub
  Downloading sentinelhub-3.11.1-py3-none-any.whl.metadata (10 kB)
Collecting aenum>=2.1.4 (from sentinelhub)
  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)
Collecting dataclasses-json (from sentinelhub)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting tomli (from sentinelhub)
  Downloading tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting tomli-w (from sentinelhub)
  Downloading tomli_w-1.2.0-py3-none-any.whl.metadata (5.7 kB)
Collecting utm (from sentinelhub)
  Downloading utm-0.8.1-py3-none-any.whl.metadata (5.2 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->sentinelhub)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json->sentinelhub)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dat

In [6]:
from sentinelhub import (
    SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubStatistical,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)

In [10]:
config = SHConfig()
config.sh_client_id = "sh-6fdecf64-d5c9-4862-ae2e-c30c7368d738"
config.sh_client_secret = "W92EDXXTIK2HR4R0bn5FOZEHeNEdkzbY"
config.sh_token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
config.sh_base_url = "https://sh.dataspace.copernicus.eu"
config.save("cdse")

In [11]:
config = SHConfig('cdse')

In [None]:
enriched_df['s2_product'].nunique()

4472

In [None]:
!pip install "hypercoast[extra]"


Collecting hypercoast[extra]
  Downloading HyperCoast-0.10.0-py2.py3-none-any.whl.metadata (12 kB)
Collecting earthaccess (from hypercoast[extra])
  Downloading earthaccess-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Collecting hvplot (from hypercoast[extra])
  Downloading hvplot-0.11.2-py3-none-any.whl.metadata (15 kB)
Collecting leafmap>=0.38.0 (from hypercoast[extra])
  Downloading leafmap-0.43.1-py2.py3-none-any.whl.metadata (16 kB)
Collecting localtileserver (from hypercoast[extra])
  Downloading localtileserver-0.10.6-py3-none-any.whl.metadata (5.2 kB)
Collecting mapclassify (from hypercoast[extra])
  Downloading mapclassify-2.8.1-py3-none-any.whl.metadata (2.8 kB)
Collecting netcdf4 (from hypercoast[extra])
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting rioxarray (from hypercoast[extra])
  Downloading rioxarray-0.18.2-py3-none-any.whl.metadata (5.4 kB)
Collecting s3fs (from hypercoast[extra])
  Downloading s3fs-2

In [None]:
!pip install openeo


Collecting openeo
  Downloading openeo-0.39.1-py3-none-any.whl.metadata (7.6 kB)
Collecting xarray!=2025.01.2,>=0.12.3 (from openeo)
  Downloading xarray-2025.3.1-py3-none-any.whl.metadata (12 kB)
Collecting pystac<1.12,>=1.5.0 (from openeo)
  Downloading pystac-1.11.0-py3-none-any.whl.metadata (4.5 kB)
Downloading openeo-0.39.1-py3-none-any.whl (303 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.0/303.0 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pystac-1.11.0-py3-none-any.whl (183 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xarray-2025.3.1-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pystac, xarray, openeo
  Attempting uninstall: pystac
    Found existing installation: pystac 1.12.2
    Uninstalling pystac-1.12.

In [None]:
enriched_df.head()

Unnamed: 0,filament_id,pixel_index,pixel_x,pixel_y,band_B01,band_B02,band_B03,band_B04,band_B05,band_B06,...,dec_time,x_centroid,y_centroid,lat_centroid,lon_centroid,n_pixels_fil,bounding_box_x_lower,bounding_box_y_lower,bounding_box_x_upper,bounding_box_y_upper
0,2000,0,9354,7205,0.1362,0.1081,0.0805,0.0491,0.0448,0.0406,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
1,2000,1,9369,7201,0.1356,0.1065,0.078,0.0491,0.0438,0.0423,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
2,2000,2,9384,7197,0.1356,0.1069,0.08,0.0482,0.0451,0.0445,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
3,2000,3,9399,7193,0.1365,0.1072,0.0797,0.049,0.0458,0.0429,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496
4,2000,4,9402,7192,0.1365,0.1077,0.08,0.05,0.0478,0.046,...,2017.713357,9409,7188,34.376412,10.869906,12,7158,9340,7235,9496


In [None]:
import pandas as pd
from sentinelhub import SentinelHubCatalog, BBox, CRS, DataCollection
from datetime import datetime, timedelta
import numpy as np

filament_df = enriched_df.drop_duplicates(subset=["filament_id"])[["filament_id", "lat_centroid", "lon_centroid", "dec_time"]]

# === Convert decimal time to YYYY-MM-DD ===
def decimal_year_to_date(decimal_year):
    year = int(decimal_year)
    remainder = decimal_year - year
    days_in_year = 366 if (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)) else 365
    day_of_year = int(remainder * days_in_year)
    return (datetime(year, 1, 1) + timedelta(days=day_of_year)).strftime("%Y-%m-%d")

filament_df["date"] = filament_df["dec_time"].apply(decimal_year_to_date)

# === Initialize catalog ===
catalog = SentinelHubCatalog(config=config)

# === Query STAC by lat/lon/date ===
def find_s2_product(lat, lon, date):
    dt = datetime.strptime(date, "%Y-%m-%d")
    bbox = BBox([lon-0.01, lat-0.01, lon+0.01, lat+0.01], crs=CRS.WGS84)
    time_interval = (dt.strftime("%Y-%m-%d"), (dt + timedelta(days=1)).strftime("%Y-%m-%d"))

    results = list(
        catalog.search(
            collection=DataCollection.SENTINEL2_L1C,
            bbox=bbox,
            time=time_interval,
            fields={"include": ["id", "properties.datetime", "properties.mgrsTile"]},
            limit=1,
        )
    )
    if results:
        item = results[0]
        return {
            "product_id": item["id"],
            "datetime": item["properties"]["datetime"],
            "mgrs_tile": item["properties"].get("mgrsTile", "Unknown")
        }
    return {"product_id": None, "datetime": None, "mgrs_tile": None}

# === Run product search per filament ===
product_rows = []
for _, row in filament_df.iterrows():
    result = find_s2_product(row["lat_centroid"], row["lon_centroid"], row["date"])
    product_rows.append(result)

# Merge with original dataframe
product_df = pd.DataFrame(product_rows)
matched_df = pd.concat([filament_df.reset_index(drop=True), product_df], axis=1)

# Save results
matched_df.to_csv("matched_s2_products.csv", index=False)
print("✅ Saved: matched_s2_products.csv")


✅ Saved: matched_s2_products.csv


In [None]:
len(matched_df)

14374

In [None]:
matched_df['product_id'].nunique()

3976

In [None]:
import requests

def get_access_token(client_id, client_secret):
    token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
    payload = {
        "client_id": config.sh_client_id,
        "client_secret": config.sh_client_secret,
        "grant_type": "client_credentials"
    }

    response = requests.post(token_url, data=payload)
    response.raise_for_status()
    return response.json()["access_token"]


In [None]:
!curl --location --request POST 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' \
--header 'Content-Type: application/x-www-form-urlencoded' \
--data-urlencode 'grant_type=client_credentials' \
--data-urlencode 'client_id=sh-6fdecf64-d5c9-4862-ae2e-c30c7368d738' \
--data-urlencode 'client_secret=W92EDXXTIK2HR4R0bn5FOZEHeNEdkzbY'


{"access_token":"eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJYVUh3VWZKaHVDVWo0X3k4ZF8xM0hxWXBYMFdwdDd2anhob2FPLUxzREZFIn0.eyJleHAiOjE3NDM3MDY3MTksImlhdCI6MTc0MzcwNjExOSwianRpIjoiNzNiZTE3ODgtYjhmMC00YzI3LThmMmMtMzMwMjQ1ODQ2ZDEwIiwiaXNzIjoiaHR0cHM6Ly9pZGVudGl0eS5kYXRhc3BhY2UuY29wZXJuaWN1cy5ldS9hdXRoL3JlYWxtcy9DRFNFIiwic3ViIjoiOTkxNjg3YzktYzlkMi00MWI1LTllZTItYTVhYjE0MmY0NjMzIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoic2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4Iiwic2NvcGUiOiJlbWFpbCBwcm9maWxlIHVzZXItY29udGV4dCIsImVtYWlsX3ZlcmlmaWVkIjpmYWxzZSwiY2xpZW50SG9zdCI6IjEwNC4xOTguMTUzLjE0MyIsIm9yZ2FuaXphdGlvbnMiOlsiZGVmYXVsdC1hOTViODU0NS1iMzFlLTRiNTgtYjlhYy0xNGVjZjQ3ZTZlZGMiXSwidXNlcl9jb250ZXh0X2lkIjoiMThlYjFjODItNmVjMC00ZjQzLWI5OGYtZjc0NjI0YzQ0Y2E5IiwiY29udGV4dF9yb2xlcyI6e30sImNvbnRleHRfZ3JvdXBzIjpbIi9hY2Nlc3NfZ3JvdXBzL3VzZXJfdHlwb2xvZ3kvY29wZXJuaWN1c19nZW5lcmFsLyIsIi9vcmdhbml6YXRpb25zL2RlZmF1bHQtYTk1Yjg1NDUtYjMxZS00YjU4LWI5YWMtMTRlY2Y0N2U2ZWRjLyJdLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJzZXJ2aWNlLWFjY29

In [None]:
!wget  --header "Authorization: Bearer $ACCESS_TOKEN" 'https://catalogue.dataspace.copernicus.eu/odata/v1/Products(002f0c9e-8a4c-465b-9e03-479475947630)/$zip' -O example_odata.zip


--2025-04-03 18:43:34--  https://catalogue.dataspace.copernicus.eu/odata/v1/Products(002f0c9e-8a4c-465b-9e03-479475947630)/$zip
Resolving catalogue.dataspace.copernicus.eu (catalogue.dataspace.copernicus.eu)... 185.48.233.66
Connecting to catalogue.dataspace.copernicus.eu (catalogue.dataspace.copernicus.eu)|185.48.233.66|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://download.dataspace.copernicus.eu/odata/v1/Products(002f0c9e-8a4c-465b-9e03-479475947630)/$zip [following]
--2025-04-03 18:43:35--  https://download.dataspace.copernicus.eu/odata/v1/Products(002f0c9e-8a4c-465b-9e03-479475947630)/$zip
Resolving download.dataspace.copernicus.eu (download.dataspace.copernicus.eu)... 185.48.234.168
Connecting to download.dataspace.copernicus.eu (download.dataspace.copernicus.eu)|185.48.234.168|:443... connected.
HTTP request sent, awaiting response... 403 Forbidden
2025-04-03 18:43:36 ERROR 403: Forbidden.



In [None]:
!curl -H "Authorization: Bearer $ACCESS_TOKEN" 'https://catalogue.dataspace.copernicus.eu/odata/v1/Products(S2A_MSIL1C_20170918T100021_N0500_R122_T32SPD_20231014T080243.SAFE)/$zip' --location-trusted --output S2.zip


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   162  100   162    0     0    310      0 --:--:-- --:--:-- --:--:--   310
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0


In [None]:
ACCESS_TOKEN = 'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJYVUh3VWZKaHVDVWo0X3k4ZF8xM0hxWXBYMFdwdDd2anhob2FPLUxzREZFIn0.eyJleHAiOjE3NDM3MDY3MTksImlhdCI6MTc0MzcwNjExOSwianRpIjoiNzNiZTE3ODgtYjhmMC00YzI3LThmMmMtMzMwMjQ1ODQ2ZDEwIiwiaXNzIjoiaHR0cHM6Ly9pZGVudGl0eS5kYXRhc3BhY2UuY29wZXJuaWN1cy5ldS9hdXRoL3JlYWxtcy9DRFNFIiwic3ViIjoiOTkxNjg3YzktYzlkMi00MWI1LTllZTItYTVhYjE0MmY0NjMzIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoic2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4Iiwic2NvcGUiOiJlbWFpbCBwcm9maWxlIHVzZXItY29udGV4dCIsImVtYWlsX3ZlcmlmaWVkIjpmYWxzZSwiY2xpZW50SG9zdCI6IjEwNC4xOTguMTUzLjE0MyIsIm9yZ2FuaXphdGlvbnMiOlsiZGVmYXVsdC1hOTViODU0NS1iMzFlLTRiNTgtYjlhYy0xNGVjZjQ3ZTZlZGMiXSwidXNlcl9jb250ZXh0X2lkIjoiMThlYjFjODItNmVjMC00ZjQzLWI5OGYtZjc0NjI0YzQ0Y2E5IiwiY29udGV4dF9yb2xlcyI6e30sImNvbnRleHRfZ3JvdXBzIjpbIi9hY2Nlc3NfZ3JvdXBzL3VzZXJfdHlwb2xvZ3kvY29wZXJuaWN1c19nZW5lcmFsLyIsIi9vcmdhbml6YXRpb25zL2RlZmF1bHQtYTk1Yjg1NDUtYjMxZS00YjU4LWI5YWMtMTRlY2Y0N2U2ZWRjLyJdLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJzZXJ2aWNlLWFjY291bnQtc2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4IiwidXNlcl9jb250ZXh0IjoiZGVmYXVsdC1hOTViODU0NS1iMzFlLTRiNTgtYjlhYy0xNGVjZjQ3ZTZlZGMiLCJjbGllbnRBZGRyZXNzIjoiMTA0LjE5OC4xNTMuMTQzIiwiY2xpZW50X2lkIjoic2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4In0.eJdFXY3L5c2OUU0G88kT2CsG49nhTJod9ORkL0gAOuTfl4r0QD4vGvk0ap8qlmrsJ31oueQKpgpC29u5MR4bFa9iELpgebAC-YdnyxdtwxmSviUJ7NbNq0MK4FHmaJPmPNgRtmhSbfgsqXDruyuBAtCH4NViGIC9Q4xq8_sE3gvkGeV1VZCzuKdCL7d9eqGoZEnGhr4aNnpDW8Pf7vBPuZgI484uwdSgQyQamdAmuB2Y4PIgDUUhreu0jtobQXq6lRYGiP-sWVefd5UsUl_r9i0nnn49IbD-wNGZx7MmfBycngULyuTrrV8QKiIzU5WCULDa7GVIMBurZ96QWuQqig'


In [None]:
import pandas as pd
import requests
from pathlib import Path

# === CONFIG ===
CDSE_USER = "navoditamathur1998@gmail.com"
CDSE_PASS = "Abcdef111998@"
BASE_URL = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products"
DOWNLOAD_DIR = Path("safe_zip_downloads")
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)

import pandas as pd
import requests
from pathlib import Path

# === Configuration ===
ACCESS_TOKEN = 'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJYVUh3VWZKaHVDVWo0X3k4ZF8xM0hxWXBYMFdwdDd2anhob2FPLUxzREZFIn0.eyJleHAiOjE3NDM3MDY3MTksImlhdCI6MTc0MzcwNjExOSwianRpIjoiNzNiZTE3ODgtYjhmMC00YzI3LThmMmMtMzMwMjQ1ODQ2ZDEwIiwiaXNzIjoiaHR0cHM6Ly9pZGVudGl0eS5kYXRhc3BhY2UuY29wZXJuaWN1cy5ldS9hdXRoL3JlYWxtcy9DRFNFIiwic3ViIjoiOTkxNjg3YzktYzlkMi00MWI1LTllZTItYTVhYjE0MmY0NjMzIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoic2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4Iiwic2NvcGUiOiJlbWFpbCBwcm9maWxlIHVzZXItY29udGV4dCIsImVtYWlsX3ZlcmlmaWVkIjpmYWxzZSwiY2xpZW50SG9zdCI6IjEwNC4xOTguMTUzLjE0MyIsIm9yZ2FuaXphdGlvbnMiOlsiZGVmYXVsdC1hOTViODU0NS1iMzFlLTRiNTgtYjlhYy0xNGVjZjQ3ZTZlZGMiXSwidXNlcl9jb250ZXh0X2lkIjoiMThlYjFjODItNmVjMC00ZjQzLWI5OGYtZjc0NjI0YzQ0Y2E5IiwiY29udGV4dF9yb2xlcyI6e30sImNvbnRleHRfZ3JvdXBzIjpbIi9hY2Nlc3NfZ3JvdXBzL3VzZXJfdHlwb2xvZ3kvY29wZXJuaWN1c19nZW5lcmFsLyIsIi9vcmdhbml6YXRpb25zL2RlZmF1bHQtYTk1Yjg1NDUtYjMxZS00YjU4LWI5YWMtMTRlY2Y0N2U2ZWRjLyJdLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJzZXJ2aWNlLWFjY291bnQtc2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4IiwidXNlcl9jb250ZXh0IjoiZGVmYXVsdC1hOTViODU0NS1iMzFlLTRiNTgtYjlhYy0xNGVjZjQ3ZTZlZGMiLCJjbGllbnRBZGRyZXNzIjoiMTA0LjE5OC4xNTMuMTQzIiwiY2xpZW50X2lkIjoic2gtNmZkZWNmNjQtZDVjOS00ODYyLWFlMmUtYzMwYzczNjhkNzM4In0.eJdFXY3L5c2OUU0G88kT2CsG49nhTJod9ORkL0gAOuTfl4r0QD4vGvk0ap8qlmrsJ31oueQKpgpC29u5MR4bFa9iELpgebAC-YdnyxdtwxmSviUJ7NbNq0MK4FHmaJPmPNgRtmhSbfgsqXDruyuBAtCH4NViGIC9Q4xq8_sE3gvkGeV1VZCzuKdCL7d9eqGoZEnGhr4aNnpDW8Pf7vBPuZgI484uwdSgQyQamdAmuB2Y4PIgDUUhreu0jtobQXq6lRYGiP-sWVefd5UsUl_r9i0nnn49IbD-wNGZx7MmfBycngULyuTrrV8QKiIzU5WCULDa7GVIMBurZ96QWuQqig'
BASE_URL = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products"
DOWNLOAD_DIR = Path("safe_zip_downloads")
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)

# === Load product IDs from matched_s2_products.csv ===
df = pd.read_csv("matched_s2_products.csv")
product_ids = df["product_id"].dropna().unique()

# === Setup session with token header ===
session = requests.Session()
session.headers.update({
    "Authorization": f"Bearer {ACCESS_TOKEN}"
})

def download_safe_zip(product_id):
    print(product_id)
    #download_url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({product_id})/$zip"
    download_url = f"{BASE_URL}({product_id})/$zip"
    print(download_url)
    output_path = DOWNLOAD_DIR / f"{product_id}.zip"

    if output_path.exists():
        print(f"✅ Already downloaded: {output_path.name}")
        return

    print(f"⬇️ Downloading: {output_path.name}")
    try:
        with session.get(download_url, stream=True) as response:
            if response.status_code == 200:
                with open(output_path, "wb") as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                print(f"✅ Downloaded: {output_path.name}")
            else:
                print(f"❌ Failed for {product_id}: HTTP {response.status_code} - {response.text}")
    except Exception as e:
        print(f"❌ Error for {product_id}: {e}")

# === Loop through all product IDs ===
for pid in product_ids:
    download_safe_zip(pid)


# === Loop over all unique product IDs ===
token = get_access_token(CDSE_USER, CDSE_PASS)
for pid in product_ids:
    download_safe_zip(pid, token)


S2A_MSIL1C_20170918T100021_N0500_R122_T32SPD_20231014T080243.SAFE
https://catalogue.dataspace.copernicus.eu/odata/v1/Products(S2A_MSIL1C_20170918T100021_N0500_R122_T32SPD_20231014T080243.SAFE)/$zip
⬇️ Downloading: S2A_MSIL1C_20170918T100021_N0500_R122_T32SPD_20231014T080243.SAFE.zip
❌ Failed for S2A_MSIL1C_20170918T100021_N0500_R122_T32SPD_20231014T080243.SAFE: HTTP 401 - {"detail":"Unauthorized"}
S2A_MSIL1C_20170928T100021_N0500_R122_T32SPD_20230912T173224.SAFE
https://catalogue.dataspace.copernicus.eu/odata/v1/Products(S2A_MSIL1C_20170928T100021_N0500_R122_T32SPD_20230912T173224.SAFE)/$zip
⬇️ Downloading: S2A_MSIL1C_20170928T100021_N0500_R122_T32SPD_20230912T173224.SAFE.zip
❌ Failed for S2A_MSIL1C_20170928T100021_N0500_R122_T32SPD_20230912T173224.SAFE: HTTP 401 - {"detail":"Unauthorized"}
S2A_MSIL1C_20170928T100021_N0500_R122_T32SNC_20230912T173224.SAFE
https://catalogue.dataspace.copernicus.eu/odata/v1/Products(S2A_MSIL1C_20170928T100021_N0500_R122_T32SNC_20230912T173224.SAFE)/$zip


KeyboardInterrupt: 

In [None]:
!pip install cdse_catalog

[31mERROR: Could not find a version that satisfies the requirement cdse_catalog (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for cdse_catalog[0m[31m
[0m

In [None]:
from cdse_catalog import CDSEConnection

# Automatically uses ~/.cdse.toml
conn = CDSEConnection()

# Replace with your actual product title
product_title = "S2A_MSIL1C_20230401T103021_N0509_R108_T32TNS_20230401T134512.SAFE"

# Search the catalog
results = conn.search(title=product_title)

if results:
    product = results[0]
    print(f"Downloading: {product.title}")
    product.download(output_dir="downloads")
else:
    print("Product not found.")


ConnectionError: HTTPSConnectionPool(host='apihub.dataspace.copernicus.eu', port=443): Max retries exceeded with url: /odata/v1/Products('S2A_MSIL1C_20170928T100021_N0500_R122_T32SNC_20230912T173224.SAFE')?$format=json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f7568124910>: Failed to resolve 'apihub.dataspace.copernicus.eu' ([Errno -2] Name or service not known)"))

In [None]:
test_product_id = "ac541bbf-18ff-5d98-967a-e41b1b2f4d27"
access_token = get_access_token(CDSE_USER, CDSE_PASS)
headers = {"Authorization": f"Bearer {access_token}"}
url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products('{test_product_id}')/$value"

r = requests.get(url, headers=headers, stream=True)
print("Status:", r.status_code)


Status: 401


In [None]:
import pandas as pd
import os
import zipfile
import rasterio
from rasterio.windows import Window
from pathlib import Path
import hypercoast

# === Setup Paths ===
work_dir = Path("hypercoast_work")
input_dir = work_dir / "safe"
output_dir = work_dir / "output"
image_patch_dir = Path("geotiffs")
input_dir.mkdir(parents=True, exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
image_patch_dir.mkdir(parents=True, exist_ok=True)

# === Download ACOLITE (once) ===
acolite_dir = hypercoast.download_acolite(str(work_dir))

# === Group by unique S2 product ===
product_groups = enriched_df.groupby("s2_product")

for s2_product, group in product_groups:
    print(f"\n🛰 Processing {s2_product} with {len(group)} filaments")

    safe_zip_path = input_dir / f"{s2_product}.zip"
    safe_folder_path = input_dir / f"{s2_product}.SAFE"

    # Step 1: Extract ZIP if not already
    if not safe_folder_path.exists():
        if safe_zip_path.exists():
            print(f"📦 Extracting {safe_zip_path}")
            try:
                with zipfile.ZipFile(safe_zip_path, 'r') as zip_ref:
                    zip_ref.extractall(input_dir)
            except Exception as e:
                print(f"❌ Failed to extract {safe_zip_path}: {e}")
                continue
        else:
            print(f"❌ ZIP file not found: {safe_zip_path}")
            continue
    else:
        print("✅ .SAFE folder exists.")

    # Step 2: Run ACOLITE once for this product
    try:
        print(f"🚀 Running ACOLITE...")
        hypercoast.run_acolite(
            acolite_dir=acolite_dir,
            input_file=str(safe_folder_path),
            out_dir=str(output_dir),
            l2w_parameters="Rrs_*",
            resolution=10,
            rgb_rhot=True,
            map_l2w=True
        )
    except Exception as e:
        print(f"❌ ACOLITE failed for {s2_product}: {e}")
        continue

    # Step 3: For each filament in this product, crop its patch
    band_path = output_dir / f"{s2_product}_Rrs_B04.tif"
    if not band_path.exists():
        print(f"⚠️ Missing ACOLITE output: {band_path}")
        continue

    for _, filament in group.iterrows():
        filament_id = filament["filament_id"]
        x_center = int(filament["x_centroid"])
        y_center = int(filament["y_centroid"])
        output_patch_path = image_patch_dir / f"filament_{filament_id}_image.tif"

        try:
            with rasterio.open(band_path) as src:
                window = Window(x_center - 128, y_center - 128, 256, 256)
                transform = src.window_transform(window)

                profile = src.meta.copy()
                profile.update({
                    "height": 256,
                    "width": 256,
                    "transform": transform
                })

                with rasterio.open(output_patch_path, "w", **profile) as dst:
                    dst.write(src.read(window=window))
            print(f"✅ Saved patch: {output_patch_path}")
        except Exception as e:
            print(f"❌ Failed to crop filament {filament_id}: {e}")


hypercoast_work/acolite_py_linux_20231023.0.tar.gz: 100%|██████████| 162M/162M [00:03<00:00, 48.0MiB/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
🛰 Processing S2A_MSIL1C_20210917T081611_N0301_R121_T36SYF_20210917T091004.SAFE with 86 filaments
❌ ZIP file not found: hypercoast_work/safe/S2A_MSIL1C_20210917T081611_N0301_R121_T36SYF_20210917T091004.SAFE.zip

🛰 Processing S2B_MSIL1C_20170630T091029_N0205_R050_T34SFH_20170630T091801.SAFE with 18 filaments
❌ ZIP file not found: hypercoast_work/safe/S2B_MSIL1C_20170630T091029_N0205_R050_T34SFH_20170630T091801.SAFE.zip

🛰 Processing S2B_MSIL1C_20170705T100029_N0205_R122_T32SPC_20170705T101050.SAFE with 600 filaments
❌ ZIP file not found: hypercoast_work/safe/S2B_MSIL1C_20170705T100029_N0205_R122_T32SPC_20170705T101050.SAFE.zip

🛰 Processing S2B_MSIL1C_20170705T100029_N0205_R122_T32SPD_20170705T101050.SAFE with 1322 filaments
❌ ZIP file not found: hypercoast_work/safe/S2B_MSIL1C_20170705T100029_N0205_R122_T32SPD_20170705T101050.SAFE.zip

🛰 Processing S2B_MSIL1C_20170705T100029_N0205_R122_T32TQR_20170705T100026.SAFE with 637 

In [None]:
df = pd.read_csv("matched_s2_products.csv")

In [None]:
len(df) == len(m_df)

True

In [None]:
import pandas as pd

for i in range(len(df)):
  dec_time_m_df = m_df.loc[0, 'dec_time']
  dec_time_df = df[df['filament_id'] == 0]['dec_time'].iloc[0]
  lat_centroid_m_df = m_df.loc[0, 'lat_centroid']
  lat_centroid_df = df[df['filament_id'] == 0]['lat_centroid'].iloc[0]
  lon_centroid_m_df = m_df.loc[0, 'lon_centroid']
  lon_centroid_df = df[df['filament_id'] == 0]['lon_centroid'].iloc[0]

  tolerance = 1e-12  # Adjust tolerance as needed
  are_equal = abs(dec_time_m_df - dec_time_df) < tolerance
  are_equal = are_equal and lat_centroid_m_df == lat_centroid_df
  are_equal = are_equal and lon_centroid_m_df == lon_centroid_df
  if are_equal:
    print(f"Row {i} is equal.")
  else:
    print(f"Row {i} is not equal.")
    break


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Row 9374 is equal.
Row 9375 is equal.
Row 9376 is equal.
Row 9377 is equal.
Row 9378 is equal.
Row 9379 is equal.
Row 9380 is equal.
Row 9381 is equal.
Row 9382 is equal.
Row 9383 is equal.
Row 9384 is equal.
Row 9385 is equal.
Row 9386 is equal.
Row 9387 is equal.
Row 9388 is equal.
Row 9389 is equal.
Row 9390 is equal.
Row 9391 is equal.
Row 9392 is equal.
Row 9393 is equal.
Row 9394 is equal.
Row 9395 is equal.
Row 9396 is equal.
Row 9397 is equal.
Row 9398 is equal.
Row 9399 is equal.
Row 9400 is equal.
Row 9401 is equal.
Row 9402 is equal.
Row 9403 is equal.
Row 9404 is equal.
Row 9405 is equal.
Row 9406 is equal.
Row 9407 is equal.
Row 9408 is equal.
Row 9409 is equal.
Row 9410 is equal.
Row 9411 is equal.
Row 9412 is equal.
Row 9413 is equal.
Row 9414 is equal.
Row 9415 is equal.
Row 9416 is equal.
Row 9417 is equal.
Row 9418 is equal.
Row 9419 is equal.
Row 9420 is equal.
Row 9421 is equal.
Row 9422 is equal.
Row 

In [1]:
!pip install sentinelsat
!pip install pystac_client
!pip install pystac
!pip install netCDF4

Collecting sentinelsat
  Downloading sentinelsat-1.2.1-py3-none-any.whl.metadata (10 kB)
Collecting html2text (from sentinelsat)
  Downloading html2text-2024.2.26.tar.gz (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting geojson>=2 (from sentinelsat)
  Downloading geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Collecting geomet (from sentinelsat)
  Downloading geomet-1.1.0-py3-none-any.whl.metadata (11 kB)
Downloading sentinelsat-1.2.1-py3-none-any.whl (48 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.8/48.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading geojson-3.2.0-py3-none-any.whl (15 kB)
Downloading geomet-1.1.0-py3-none-any.whl (31 kB)
Building wheels for collected packages: html2text
  Building wheel for html2text (setup.py) ... [?25l[?25hdone
  Created wheel for html2text: filename=html2text

In [14]:
config = SHConfig()
config.sh_client_id = "sh-d8cb7089-a727-46c1-962a-cc25356a21f8"
config.sh_client_secret = "1Ml18KecO9NRA0Wm1u8rh5REqUvQHO4R"
config.sh_token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
config.sh_base_url = "https://sh.dataspace.copernicus.eu"
config.save("cdse")

In [23]:
# Download Natural Earth dataset directly
!unzip ne_110m_admin_0_countries.zip

Archive:  ne_110m_admin_0_countries.zip
  inflating: ne_110m_admin_0_countries.README.html  
 extracting: ne_110m_admin_0_countries.VERSION.txt  
 extracting: ne_110m_admin_0_countries.cpg  
  inflating: ne_110m_admin_0_countries.dbf  
  inflating: ne_110m_admin_0_countries.prj  
  inflating: ne_110m_admin_0_countries.shp  
  inflating: ne_110m_admin_0_countries.shx  


In [28]:
!pip install geopandas ace_tools

Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace_tools
Successfully installed ace_tools-0.0


In [32]:
import geopandas as gpd
from shapely.geometry import Polygon
import pandas as pd

# Define bounding boxes
# Change accordingly
AOI_BBOXES = {
    'Po_River_Plume':      [12.5, 44.8, 13.2, 45.2],
    'Northern_Corsica':    [8.5, 42.9, 9.2, 43.6],
    'South_East_Calabria': [16.5, 38.35, 16.755, 38.555],
    'Gulf_of_Genova':      [8.5, 43.7, 9.2, 44.2],
}

# Convert to polygons explicitly
aoi_data = []
for name, (minx, miny, maxx, maxy) in AOI_BBOXES.items():
    poly = Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny)])
    aoi_data.append({"name": name, "geometry": poly})

aoi_gdf = gpd.GeoDataFrame(aoi_data, crs="EPSG:4326")

# Load land polygons using the downloaded file
land = gpd.read_file("ne_110m_admin_0_countries.shp")

# Check if AOIs intersect land
aoi_with_land = gpd.sjoin(aoi_gdf, land, how="left", predicate="intersects")

# Instead of directly using  continent and name_right which may not exist, use available columns.
# Inspect the columns in land using land.columns
# Choose appropriate columns and rename if needed
aoi_with_land_result = aoi_with_land[["name", "geometry", "ADMIN", "NAME"]].rename(
    columns={"ADMIN": "continent", "NAME": "intersecting_land"} # Replace ADMIN, NAME with actual column names from land
)


# Show result
print(aoi_with_land_result[["name", "intersecting_land", "continent"]])


                  name intersecting_land continent
0       Po_River_Plume               NaN       NaN
1     Northern_Corsica               NaN       NaN
2  South_East_Calabria               NaN       NaN
3       Gulf_of_Genova               NaN       NaN


In [33]:
from netCDF4 import Dataset
from datetime import datetime, timedelta
from sentinelhub import SentinelHubCatalog, SHConfig, BBox, CRS, DataCollection
import pandas as pd
import numpy as np

# === Load from .nc file ===
nc_path = "/content/drive/MyDrive/WASP_LW_SENT2_MED_L1C_B_201506_202109_10m_6y_NRT_v1.0.nc"
ds = Dataset(nc_path)

# === Extract fields from NetCDF ===
s2_products = ["".join(x).strip() for x in ds.variables["s2_product"][:].data.astype(str)]
lat_centroid = ds.variables["lat_centroid"][:]
lon_centroid = ds.variables["lon_centroid"][:]

# === Parse datetime from original product name ===
def get_datetime_from_s2_product(product):
    try:
        dt_str = product.split("_")[2]  # e.g., 20170918T100021
        return datetime.strptime(dt_str, "%Y%m%dT%H%M%S")
    except Exception:
        return None

# === Initialize Sentinel Hub Catalog ===
config = SHConfig('cdse')
catalog = SentinelHubCatalog(config=config)

# === Query updated product from SentinelHub ===
def find_updated_safe(lat, lon, dt):
    if dt is None:
        return None

    bbox = BBox([lon - 0.01, lat - 0.01, lon + 0.01, lat + 0.01], crs=CRS.WGS84)
    time_interval = (dt.strftime("%Y-%m-%d"), (dt + timedelta(days=1)).strftime("%Y-%m-%d"))

    results = list(
        catalog.search(
            collection=DataCollection.SENTINEL2_L1C,
            bbox=bbox,
            time=time_interval,
            fields={"include": ["id", "properties.datetime", "properties.mgrsTile"]},
            limit=1,
        )
    )

    if results:
        item = results[0]
        product_id = item["id"]
        acquisition_time = item["properties"]["datetime"]
        mgrs_tile = item["properties"].get("mgrsTile", "Unknown")

        return {
            "updated_product_id": product_id,
            "acquisition_datetime": acquisition_time,
            "mgrs_tile": mgrs_tile
        }

    return {"updated_product_id": None, "acquisition_datetime": None, "mgrs_tile": None}

# Define bounding boxes
AOI_BBOXES = {
    'Po_River_Plume':      [12.5, 44.8, 13.2, 45.2],
    'Northern_Corsica':    [8.5, 42.9, 9.2, 43.6],
    'South_East_Calabria': [16.5, 38.35, 16.755, 38.555],
    'Gulf_of_Genova':      [8.5, 43.7, 9.2, 44.2],
}

def valid_point(lat, lon):
   for name, (minx, miny, maxx, maxy) in AOI_BBOXES.items():
       if minx <= lon <= maxx and miny <= lat <= maxy:
           return True
   return False

# === Build the result table ===
rows = []
for i in range(len(s2_products)):
    original_id = s2_products[i]
    lat = lat_centroid[i]
    lon = lon_centroid[i]
    if not valid_point(lat, lon):
        continue
    dt = get_datetime_from_s2_product(original_id)
    updated_info = find_updated_safe(lat, lon, dt) or {}
    rows.append({
        "filament_id": i,
        "original_s2_product": original_id,
        "lat_centroid": lat,
        "lon_centroid": lon,
        **updated_info
    })

# === Save to CSV ===
df = pd.DataFrame(rows)
df.to_csv("updated_s2_products.csv", index=False)
print("✅ Saved: updated_s2_products.csv")


✅ Saved: updated_s2_products.csv


In [39]:
len(df)

966

In [40]:
df.head()

Unnamed: 0,filament_id,original_s2_product,lat_centroid,lon_centroid,updated_product_id,acquisition_datetime,mgrs_tile
0,2798,S2A_MSIL1C_20170723T101031_N0205_R022_T32TMN_2...,43.264108,9.105954,S2A_MSIL1C_20170723T101031_N0500_R022_T32TMN_2...,2017-07-23T10:19:06.461Z,Unknown
1,2799,S2A_MSIL1C_20170921T101021_N0205_R022_T32TMN_2...,43.255225,9.107647,S2A_MSIL1C_20170921T101021_N0500_R022_T32TMN_2...,2017-09-21T10:19:00.545Z,Unknown
2,2802,S2A_MSIL1C_20180509T101031_N0206_R022_T32TMN_2...,43.240564,9.084905,S2A_MSIL1C_20180509T101031_N0500_R022_T32TMN_2...,2018-05-09T10:19:07.746Z,Unknown
3,2807,S2A_MSIL1C_20180323T102021_N0206_R065_T32TMN_2...,42.905445,8.966524,S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_2...,2018-03-23T10:28:58.753Z,Unknown
4,2808,S2A_MSIL1C_20180323T102021_N0206_R065_T32TMN_2...,42.90464,9.024241,S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_2...,2018-03-23T10:28:58.753Z,Unknown


In [38]:
df['original_s2_product'].nunique()

357

In [37]:
df['updated_product_id'].nunique()

250

In [50]:
def match(new, old):
  mismatch = False
  old_tile_id = old.split('_')[5].strip()
  new_tile_id = new.split('_')[5].strip()
  print(old_tile_id, new_tile_id)
  if old_tile_id == new_tile_id:
      print("Tile ID matches")
  else:
      mismatch = True
  old_str_time = old.split('_')[2].strip()
  new_str_time = new.split('_')[2].strip()
  print(old_str_time, new_str_time)
  if old_str_time == new_str_time:
      print("Time matches")
  else:
      mismatch = True
  print(mismatch)
  return mismatch

In [52]:
df['mis_match'] = df.apply(lambda row: match(row['original_s2_product'], row['updated_product_id']), axis=1)

T32TMN T32TMN
Tile ID matches
20170723T101031 20170723T101031
Time matches
False
T32TMN T32TMN
Tile ID matches
20170921T101021 20170921T101021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180509T101031 20180509T101031
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20201018T102041 20201018T102041
Time matches
False
T32TMP T32TMP
Tile ID matches
20170517T102031 20170517T102031
Time matches
False
T32TMP T32TMP
Tile ID matches
20170517T102031 20170517T102031
Time matches
False
T32TMP T32TMP
Tile ID matche

In [53]:
df['mis_match'].value_counts()

Unnamed: 0_level_0,count
mis_match,Unnamed: 1_level_1
True,567
False,399


In [63]:
df['updated_product_id'].unique()

array(['S2A_MSIL1C_20170723T101031_N0500_R022_T32TMN_20231008T072550.SAFE',
       'S2A_MSIL1C_20170921T101021_N0500_R022_T32TMN_20231014T085005.SAFE',
       'S2A_MSIL1C_20180509T101031_N0500_R022_T32TMN_20230829T201517.SAFE',
       'S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_20230904T190420.SAFE',
       'S2A_MSIL1C_20200521T102031_N0500_R065_T32TMN_20230503T140151.SAFE',
       'S2A_MSIL1C_20201018T102041_N0500_R065_T32TMN_20230414T050856.SAFE',
       'S2A_MSIL1C_20170517T102031_N0500_R065_T32TMP_20231114T020912.SAFE',
       'S2A_MSIL1C_20170527T102031_N0500_R065_T32TMP_20231111T145716.SAFE',
       'S2A_MSIL1C_20170616T102021_N0500_R065_T32TMP_20231012T144506.SAFE',
       'S2A_MSIL1C_20180422T102031_N0500_R065_T32TMP_20230915T072546.SAFE',
       'S2A_MSIL1C_20200521T102031_N0500_R065_T32TMP_20230503T140151.SAFE',
       'S2A_MSIL1C_20201018T102041_N0500_R065_T32TMP_20230414T050856.SAFE',
       'S2A_MSIL1C_20170424T101031_N0500_R022_T32TMN_20231114T114644.SAFE',
       'S2A_