In [3]:
!pip install sentinelhub
!pip install tqdm
!pip install netCDF4

Collecting sentinelhub
  Downloading sentinelhub-3.11.1-py3-none-any.whl (249 kB)
[K     |████████████████████████████████| 249 kB 6.8 MB/s eta 0:00:01
[?25hCollecting aenum>=2.1.4
  Downloading aenum-3.1.15-py3-none-any.whl (137 kB)
[K     |████████████████████████████████| 137 kB 15.0 MB/s eta 0:00:01
Collecting typing-extensions>=4.5.0
  Downloading typing_extensions-4.13.2-py3-none-any.whl (45 kB)
[K     |████████████████████████████████| 45 kB 9.0 MB/s  eta 0:00:01
[?25hCollecting dataclasses-json
  Using cached dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting requests-oauthlib>=1.0.0
  Using cached requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)
Collecting utm
  Downloading utm-0.8.1-py3-none-any.whl (8.6 kB)
Collecting tomli-w
  Downloading tomli_w-1.2.0-py3-none-any.whl (6.7 kB)
Collecting oauthlib
  Using cached oauthlib-3.2.2-py3-none-any.whl (151 kB)
Collecting pillow>=9.2.0
  Downloading pillow-11.2.1-cp39-cp39-macosx_10_10_x86_64.whl (3.2 MB)
[K     |█

In [4]:
import os
import sys
import pandas as pd
import requests
import json
import datetime
from tqdm import tqdm
from sentinelhub import (SHConfig,
    DataCollection,
    SentinelHubCatalog,
    SentinelHubRequest,
    SentinelHubStatistical,
    BBox,
    bbox_to_dimensions,
    CRS,
    MimeType,
    Geometry,
)
import netCDF4 as nc
from netCDF4 import Dataset

In [99]:
import pandas as pd
from netCDF4 import Dataset
from datetime import datetime
import numpy as np
from shapely.geometry import Point, box

# ----------------------------------------
# 1. Define AOI
# ----------------------------------------
from shapely.geometry import box

aoi_list = [
    {
        'name': 'Po River Plume',
        'bbox': box(12.45, 44.825, 12.7, 45.055),  # [min_lon, min_lat, max_lon, max_lat]
    },
    {
        'name' : 'North East Corsica', # 'T32TNN'
        'bbox' : box(9.6, 42.95, 9.9, 43.155)
    },
    {
        'name': 'South East Calabria', # 'T33SXC'
        'bbox': box(16.5, 38.35, 16.755, 38.555)
    }
]


# ----------------------------------------
# 2. Load Excel file
# ----------------------------------------
excel_path = "/content/LM_centroids.xlsx"
df_excel = pd.read_excel(excel_path, engine='openpyxl')
df_excel['Formatted_time'] = pd.to_datetime(df_excel['Str_time'], format='%Y%m%dT%H%M%S')

# ----------------------------------------
# 3. Load NetCDF file
# ----------------------------------------
ds = Dataset('/content/drive/MyDrive/WASP_LW_SENT2_MED_L1C_B_201506_202109_10m_6y_NRT_v1.0.nc')

nc_times = ds.variables['s2_product'][:]  # e.g., b'S2A_MSIL1C_20180612T081021_...'
nc_lat = ds.variables['lat_centroid'][:]
nc_lon = ds.variables['lon_centroid'][:]
n_pixels_fil = ds.variables['n_pixels_fil'][:]
pixel_x = ds.variables['pixel_x'][:]          # shape: (N_filaments, 2563)
pixel_y = ds.variables['pixel_y'][:]
pixel_spec = ds.variables['pixel_spec'][:]    # shape: (N_filaments, 2563, 13)

# ----------------------------------------
# 4. Parse NetCDF times
# ----------------------------------------
def extract_datetime_from_product(product_str):
    str_time = product_str.data.tobytes().decode('utf-8').split('_')[2]
    return datetime.strptime(str_time, '%Y%m%dT%H%M%S')

nc_datetimes = [extract_datetime_from_product(p) for p in nc_times]

# ----------------------------------------
# 5. Match Excel to NetCDF annotations
# ----------------------------------------
matched_rows = []
index = 0

for _, row in df_excel.iterrows():
    excel_time = datetime.strptime(row['Str_time'], '%Y%m%dT%H%M%S')
    excel_lat = row['Latitude']
    excel_lon = row['Longitude']

    # Compute time & spatial difference
    time_diffs = [abs((excel_time - nc_time).days) for nc_time in nc_datetimes]
    spatial_dists = np.sqrt((excel_lat - nc_lat)**2 + (excel_lon - nc_lon)**2)

    # Valid candidates within ±3 days and 0.05° (≈5 km)
    valid_indices = [i for i in range(len(nc_datetimes))
                     if time_diffs[i] <= 3 and spatial_dists[i] <= 0.05]

    if valid_indices:
        best_idx = valid_indices[np.argmin([spatial_dists[i] for i in valid_indices])]
        n = n_pixels_fil[best_idx]

        matched_point = Point(nc_lon[best_idx], nc_lat[best_idx])
        aoi_label = None
        for aoi in aoi_list:
          if aoi['bbox'].contains(matched_point):
            aoi_label = aoi['name']
            break

        matched_rows.append({
            'index': index,
            'excel_datetime': excel_time,
            'excel_lat': excel_lat,
            'excel_lon': excel_lon,
            'matched_time': nc_datetimes[best_idx],
            'matched_lat': nc_lat[best_idx],
            'matched_lon': nc_lon[best_idx],
            'matched_s2_product': nc_times[best_idx].data.tobytes().decode('utf-8'),
            'match_distance_deg': spatial_dists[best_idx],
            'AOI': aoi_label,
            'tile_name': nc_times[best_idx].data.tobytes().decode('utf-8').split('_')[5],
            'pixel_x': pixel_x[best_idx, :n].tolist(),
            'pixel_y': pixel_y[best_idx, :n].tolist(),
            'pixel_spec': pixel_spec[best_idx, :n, :].tolist()
        })
    else:
        matched_rows.append({
            'index': index,
            'excel_datetime': excel_time,
            'excel_lat': excel_lat,
            'excel_lon': excel_lon,
            'matched_time': None,
            'matched_lat': None,
            'matched_lon': None,
            'matched_s2_product': None,
            'match_distance_deg': None,
            'AOI': None,
            'tile_name': None,
            'pixel_x': None,
            'pixel_y': None,
            'pixel_spec': None
        })

    index += 1

# ----------------------------------------
# 6. Save to CSV (or optionally Pickle)
# ----------------------------------------
df_matched = pd.DataFrame(matched_rows)
df_matched.to_csv("matched_annotations.csv", index=False)
print("✅ Matching complete. Results saved to matched_annotations.csv.")

✅ Matching complete. Results saved to matched_annotations.csv.


In [5]:
import pandas as pd
df_matched = pd.read_csv('matched_annotations.csv')

In [4]:
tile_df = df_matched.loc[df_matched['AOI'].notna(), ['matched_lat', 'matched_lon', 'tile_name', 'AOI', 'matched_time']]


In [5]:
tile_df.groupby(['tile_name', 'AOI']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,matched_lat,matched_lon,matched_time
tile_name,AOI,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
T32TNN,North East Corsica,2,2,2
T32TQQ,Po River Plume,134,134,134
T32TQR,Po River Plume,38,38,38
T33SXC,South East Calabria,27,27,27
T33TUK,Po River Plume,147,147,147


In [7]:
tile_df.groupby(['AOI', 'tile_name', 'matched_time']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,matched_lat,matched_lon
AOI,tile_name,matched_time,Unnamed: 3_level_1,Unnamed: 4_level_1
North East Corsica,T32TNN,2016-04-29 10:10:32,1,1
North East Corsica,T32TNN,2019-06-18 10:10:29,1,1
Po River Plume,T32TQQ,2015-07-11 10:00:06,1,1
Po River Plume,T32TQQ,2015-08-20 10:00:16,4,4
Po River Plume,T32TQQ,2015-08-30 10:00:16,1,1
...,...,...,...,...
South East Calabria,T33SXC,2016-11-26 09:43:32,7,7
South East Calabria,T33SXC,2018-10-07 09:40:31,14,14
South East Calabria,T33SXC,2018-11-06 09:42:01,4,4
South East Calabria,T33SXC,2019-03-31 09:40:39,1,1


In [6]:
time_df = df_matched.loc[df_matched['AOI'].notna(), ['tile_name', 'AOI', 'matched_time']]


In [7]:
time_df.groupby(['AOI', 'tile_name']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,matched_time
AOI,tile_name,Unnamed: 2_level_1
North East Corsica,T32TNN,2
Po River Plume,T32TQQ,134
Po River Plume,T32TQR,38
Po River Plume,T33TUK,147
South East Calabria,T33SXC,27
