In [1]:
!pip install sentinelsat
!pip install pystac_client
!pip install pystac
!pip install netCDF4

Collecting sentinelsat
  Downloading sentinelsat-1.2.1-py3-none-any.whl.metadata (10 kB)
Collecting html2text (from sentinelsat)
  Downloading html2text-2024.2.26.tar.gz (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting geojson>=2 (from sentinelsat)
  Downloading geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Collecting geomet (from sentinelsat)
  Downloading geomet-1.1.0-py3-none-any.whl.metadata (11 kB)
Downloading sentinelsat-1.2.1-py3-none-any.whl (48 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.8/48.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading geojson-3.2.0-py3-none-any.whl (15 kB)
Downloading geomet-1.1.0-py3-none-any.whl (31 kB)
Building wheels for collected packages: html2text
  Building wheel for html2text (setup.py) ... [?25l[?25hdone
  Created wheel for html2text: filename=html2text

In [14]:
config = SHConfig()
config.sh_client_id = "sh-d8cb7089-a727-46c1-962a-cc25356a21f8"
config.sh_client_secret = "1Ml18KecO9NRA0Wm1u8rh5REqUvQHO4R"
config.sh_token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
config.sh_base_url = "https://sh.dataspace.copernicus.eu"
config.save("cdse")

In [23]:
# Download Natural Earth dataset directly
!unzip ne_110m_admin_0_countries.zip

Archive:  ne_110m_admin_0_countries.zip
  inflating: ne_110m_admin_0_countries.README.html  
 extracting: ne_110m_admin_0_countries.VERSION.txt  
 extracting: ne_110m_admin_0_countries.cpg  
  inflating: ne_110m_admin_0_countries.dbf  
  inflating: ne_110m_admin_0_countries.prj  
  inflating: ne_110m_admin_0_countries.shp  
  inflating: ne_110m_admin_0_countries.shx  


In [28]:
!pip install geopandas ace_tools

Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace_tools
Successfully installed ace_tools-0.0


In [32]:
import geopandas as gpd
from shapely.geometry import Polygon
import pandas as pd

# Define bounding boxes
# Change accordingly
AOI_BBOXES = {
    'Po_River_Plume':      [12.5, 44.8, 13.2, 45.2],
    'Northern_Corsica':    [8.5, 42.9, 9.2, 43.6],
    'South_East_Calabria': [16.5, 38.35, 16.755, 38.555],
    'Gulf_of_Genova':      [8.5, 43.7, 9.2, 44.2],
}

# Convert to polygons explicitly
aoi_data = []
for name, (minx, miny, maxx, maxy) in AOI_BBOXES.items():
    poly = Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny)])
    aoi_data.append({"name": name, "geometry": poly})

aoi_gdf = gpd.GeoDataFrame(aoi_data, crs="EPSG:4326")

# Load land polygons using the downloaded file
land = gpd.read_file("ne_110m_admin_0_countries.shp")

# Check if AOIs intersect land
aoi_with_land = gpd.sjoin(aoi_gdf, land, how="left", predicate="intersects")

# Instead of directly using  continent and name_right which may not exist, use available columns.
# Inspect the columns in land using land.columns
# Choose appropriate columns and rename if needed
aoi_with_land_result = aoi_with_land[["name", "geometry", "ADMIN", "NAME"]].rename(
    columns={"ADMIN": "continent", "NAME": "intersecting_land"} # Replace ADMIN, NAME with actual column names from land
)


# Show result
print(aoi_with_land_result[["name", "intersecting_land", "continent"]])


                  name intersecting_land continent
0       Po_River_Plume               NaN       NaN
1     Northern_Corsica               NaN       NaN
2  South_East_Calabria               NaN       NaN
3       Gulf_of_Genova               NaN       NaN


In [33]:
from netCDF4 import Dataset
from datetime import datetime, timedelta
from sentinelhub import SentinelHubCatalog, SHConfig, BBox, CRS, DataCollection
import pandas as pd
import numpy as np

# === Load from .nc file ===
nc_path = "/content/drive/MyDrive/WASP_LW_SENT2_MED_L1C_B_201506_202109_10m_6y_NRT_v1.0.nc"
ds = Dataset(nc_path)

# === Extract fields from NetCDF ===
s2_products = ["".join(x).strip() for x in ds.variables["s2_product"][:].data.astype(str)]
lat_centroid = ds.variables["lat_centroid"][:]
lon_centroid = ds.variables["lon_centroid"][:]

# === Parse datetime from original product name ===
def get_datetime_from_s2_product(product):
    try:
        dt_str = product.split("_")[2]  # e.g., 20170918T100021
        return datetime.strptime(dt_str, "%Y%m%dT%H%M%S")
    except Exception:
        return None

# === Initialize Sentinel Hub Catalog ===
config = SHConfig('cdse')
catalog = SentinelHubCatalog(config=config)

# === Query updated product from SentinelHub ===
def find_updated_safe(lat, lon, dt):
    if dt is None:
        return None

    bbox = BBox([lon - 0.01, lat - 0.01, lon + 0.01, lat + 0.01], crs=CRS.WGS84)
    time_interval = (dt.strftime("%Y-%m-%d"), (dt + timedelta(days=1)).strftime("%Y-%m-%d"))

    results = list(
        catalog.search(
            collection=DataCollection.SENTINEL2_L1C,
            bbox=bbox,
            time=time_interval,
            fields={"include": ["id", "properties.datetime", "properties.mgrsTile"]},
            limit=1,
        )
    )

    if results:
        item = results[0]
        product_id = item["id"]
        acquisition_time = item["properties"]["datetime"]
        mgrs_tile = item["properties"].get("mgrsTile", "Unknown")

        return {
            "updated_product_id": product_id,
            "acquisition_datetime": acquisition_time,
            "mgrs_tile": mgrs_tile
        }

    return {"updated_product_id": None, "acquisition_datetime": None, "mgrs_tile": None}

# Define bounding boxes
AOI_BBOXES = {
    'Po_River_Plume':      [12.5, 44.8, 13.2, 45.2],
    'Northern_Corsica':    [8.5, 42.9, 9.2, 43.6],
    'South_East_Calabria': [16.5, 38.35, 16.755, 38.555],
    'Gulf_of_Genova':      [8.5, 43.7, 9.2, 44.2],
}

def valid_point(lat, lon):
   for name, (minx, miny, maxx, maxy) in AOI_BBOXES.items():
       if minx <= lon <= maxx and miny <= lat <= maxy:
           return True
   return False

# === Build the result table ===
rows = []
for i in range(len(s2_products)):
    original_id = s2_products[i]
    lat = lat_centroid[i]
    lon = lon_centroid[i]
    if not valid_point(lat, lon):
        continue
    dt = get_datetime_from_s2_product(original_id)
    updated_info = find_updated_safe(lat, lon, dt) or {}
    rows.append({
        "filament_id": i,
        "original_s2_product": original_id,
        "lat_centroid": lat,
        "lon_centroid": lon,
        **updated_info
    })

# === Save to CSV ===
df = pd.DataFrame(rows)
df.to_csv("updated_s2_products.csv", index=False)
print("✅ Saved: updated_s2_products.csv")


✅ Saved: updated_s2_products.csv


In [39]:
len(df)

966

In [40]:
df.head()

Unnamed: 0,filament_id,original_s2_product,lat_centroid,lon_centroid,updated_product_id,acquisition_datetime,mgrs_tile
0,2798,S2A_MSIL1C_20170723T101031_N0205_R022_T32TMN_2...,43.264108,9.105954,S2A_MSIL1C_20170723T101031_N0500_R022_T32TMN_2...,2017-07-23T10:19:06.461Z,Unknown
1,2799,S2A_MSIL1C_20170921T101021_N0205_R022_T32TMN_2...,43.255225,9.107647,S2A_MSIL1C_20170921T101021_N0500_R022_T32TMN_2...,2017-09-21T10:19:00.545Z,Unknown
2,2802,S2A_MSIL1C_20180509T101031_N0206_R022_T32TMN_2...,43.240564,9.084905,S2A_MSIL1C_20180509T101031_N0500_R022_T32TMN_2...,2018-05-09T10:19:07.746Z,Unknown
3,2807,S2A_MSIL1C_20180323T102021_N0206_R065_T32TMN_2...,42.905445,8.966524,S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_2...,2018-03-23T10:28:58.753Z,Unknown
4,2808,S2A_MSIL1C_20180323T102021_N0206_R065_T32TMN_2...,42.90464,9.024241,S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_2...,2018-03-23T10:28:58.753Z,Unknown


In [38]:
df['original_s2_product'].nunique()

357

In [37]:
df['updated_product_id'].nunique()

250

In [50]:
def match(new, old):
  mismatch = False
  old_tile_id = old.split('_')[5].strip()
  new_tile_id = new.split('_')[5].strip()
  print(old_tile_id, new_tile_id)
  if old_tile_id == new_tile_id:
      print("Tile ID matches")
  else:
      mismatch = True
  old_str_time = old.split('_')[2].strip()
  new_str_time = new.split('_')[2].strip()
  print(old_str_time, new_str_time)
  if old_str_time == new_str_time:
      print("Time matches")
  else:
      mismatch = True
  print(mismatch)
  return mismatch

In [52]:
df['mis_match'] = df.apply(lambda row: match(row['original_s2_product'], row['updated_product_id']), axis=1)

T32TMN T32TMN
Tile ID matches
20170723T101031 20170723T101031
Time matches
False
T32TMN T32TMN
Tile ID matches
20170921T101021 20170921T101021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180509T101031 20180509T101031
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20180323T102021 20180323T102021
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20200521T102031 20200521T102031
Time matches
False
T32TMN T32TMN
Tile ID matches
20201018T102041 20201018T102041
Time matches
False
T32TMP T32TMP
Tile ID matches
20170517T102031 20170517T102031
Time matches
False
T32TMP T32TMP
Tile ID matches
20170517T102031 20170517T102031
Time matches
False
T32TMP T32TMP
Tile ID matche

In [53]:
df['mis_match'].value_counts()

Unnamed: 0_level_0,count
mis_match,Unnamed: 1_level_1
True,567
False,399


In [63]:
df['updated_product_id'].unique()

array(['S2A_MSIL1C_20170723T101031_N0500_R022_T32TMN_20231008T072550.SAFE',
       'S2A_MSIL1C_20170921T101021_N0500_R022_T32TMN_20231014T085005.SAFE',
       'S2A_MSIL1C_20180509T101031_N0500_R022_T32TMN_20230829T201517.SAFE',
       'S2A_MSIL1C_20180323T102021_N0500_R065_T32TMN_20230904T190420.SAFE',
       'S2A_MSIL1C_20200521T102031_N0500_R065_T32TMN_20230503T140151.SAFE',
       'S2A_MSIL1C_20201018T102041_N0500_R065_T32TMN_20230414T050856.SAFE',
       'S2A_MSIL1C_20170517T102031_N0500_R065_T32TMP_20231114T020912.SAFE',
       'S2A_MSIL1C_20170527T102031_N0500_R065_T32TMP_20231111T145716.SAFE',
       'S2A_MSIL1C_20170616T102021_N0500_R065_T32TMP_20231012T144506.SAFE',
       'S2A_MSIL1C_20180422T102031_N0500_R065_T32TMP_20230915T072546.SAFE',
       'S2A_MSIL1C_20200521T102031_N0500_R065_T32TMP_20230503T140151.SAFE',
       'S2A_MSIL1C_20201018T102041_N0500_R065_T32TMP_20230414T050856.SAFE',
       'S2A_MSIL1C_20170424T101031_N0500_R022_T32TMN_20231114T114644.SAFE',
       'S2A_