# SWOT PIXC pre processing

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

### One file test

In [2]:
# ---------- Filepath ----------
aufeis_test_tile_filepath = "../../../../shared_space/SWOT_Aufeis/SWOT_PIXC_data/SWOT_L2_HR_PIXC_033_487_278R_20250605T070551_20250605T070602_PID0_01.nc"

target_crs = "EPSG:32606"   # UTM zone 6N

In [7]:
# NetCDF structure check
import netCDF4
data_structure = netCDF4.Dataset(aufeis_test_tile_filepath)
print(data_structure) # shows 'groups: pixel_cloud, tvp, noise'

# file pointer
# fp = xr.open_dataset(aufeis_test_tile_filepath, group = 'pixel_cloud')
# print(fp)
# print(fp.attrs)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    Conventions: CF-1.7
    title: Level 2 KaRIn High Rate Water Mask Pixel Cloud Data Product
    institution: CNES
    source: Ka-band radar interferometer
    history: 2025-06-08T19:56:43Z : Creation
    platform: SWOT
    reference_document: D-56411_SWOT_Product_Description_L2_HR_PIXC
    contact: podaac@podaac.jpl.nasa.gov
    cycle_number: 33
    pass_number: 487
    tile_number: 278
    swath_side: R
    tile_name: 487_278R
    short_name: L2_HR_PIXC
    crid: PID0
    pge_name: PGE_L2_HR_PIXC
    pge_version: 5.4.2
    time_granule_start: 2025-06-05T07:05:51.405018Z
    time_granule_end: 2025-06-05T07:06:02.513600Z
    time_coverage_start: 2025-06-05T07:05:51.951466Z
    time_coverage_end: 2025-06-05T07:06:01.967025Z
    geospatial_lon_min: -143.07186960352502
    geospatial_lon_max: -140.8913156137821
    geospatial_lat_min: 67.8555721289324
    geospatial_lat_max: 68.68735271984485
    inner

In [6]:
# Extract the variables we want into 1D numpy arrays
def getvar(name):
    return fp[name].values.ravel() if name in fp else np.nan

# Build a Pandas DataFrame
SWOT_Points = pd.DataFrame({
    "longitude": getvar("longitude"),
    "latitude": getvar("latitude"),
    "height": getvar("height"),
    "phase_noise_std": getvar("phase_noise_std"),
    "dheight": getvar("dheight_dphase"),
    # elevation corrections
    "geoid": getvar("geoid"),
    "solid_tide": getvar("solid_earth_tide"),
    "load_tide": getvar("load_tide_fes"),
    "pole_tide": getvar("pole_tide"),
    "class": getvar("classification"),
    "classqual": getvar("classification"),
    "bright_land_flag": getvar("bright_land_flag"),
    "ancillary_surface_classification_flag": getvar("ancillary_surface_classification_flag"),
    "waterfrac": getvar("water_frac"),
    "waterfrac_uncert": getvar("water_frac_uncert"),
    "prior_water_prob": getvar("prior_water_prob"),
    "geolocqual": getvar("geolocation_qual"),
    "sig0": getvar("sig0"),
    "sig0_uncert": getvar("sig0_uncert"),
    "sig0_qual": getvar("sig0_qual"),
    "crosstrack": getvar("cross_track"),
    "pixel_area": getvar("pixel_area"),
    "darea_dheight": getvar("darea_dheight")
})

# Derive elevation & height uncertainty
SWOT_Points["height_uncert"] = SWOT_Points["phase_noise"] * SWOT_Points["dheight"]
SWOT_Points["geoid_correction"] = SWOT_Points["geoid"] - SWOT_Points["solid_tide"] - SWOT_Points["load_tide"] - SWOT_Points["pole_tide"]
SWOT_Points["elevation"] = SWOT_Points["height"] - SWOT_Points["geoid_correction"]

# Drop any empty rows
SWOT_Points = SWOT_Points.dropna(how="all")

# Quality filtering
geolocqual_problem_bits = {
    4, 4101, 5, 6, 4100, 4102, 524292, 524293, 524294, 524295,
    528389, 528390, 7, 528388, 16777220, 17301508, 17305604,
    528391, 4103
}

SWOT_Points = SWOT_Points[
    (~SWOT_Points["geolocqual"].isin(geolocqual_problem_bits)) &
    (SWOT_Points["crosstrack"].abs().between(10000, 60000))
]

print(SWOT_Points.head())
fp.close()

In [6]:
# need to add cycle, pass, time to SWOT_Points df
# for ML:
# longitude, latitude, NORMALIZED to each tile elevation, phase_noise_std, sig0
# crosstrack, maybe cycle & pass??

      longitude   latitude      height  phase_noise   dheight     geoid  \
211 -143.098029  68.201372  571.506653     0.117613  0.744377  6.751453   
212 -143.096199  68.200914  571.323730     0.110314  0.758199  6.750676   
213 -143.093815  68.200317  571.329895     0.131377  0.776198  6.749667   
285 -142.934672  68.160307  572.359375     0.109845  1.980290  6.684452   
286 -142.934076  68.160156  572.105469     0.061149  1.984812  6.684213   

     solid_tide  load_tide  pole_tide  class     ...      geolocqual  \
211   -0.022910  -0.008105  -0.000049    6.0     ...             0.0   
212   -0.022910  -0.008105  -0.000049    6.0     ...             0.0   
213   -0.022911  -0.008105  -0.000049    3.0     ...             0.0   
285   -0.022986  -0.008123  -0.000052    3.0     ...             0.0   
286   -0.022986  -0.008123  -0.000052    3.0     ...             0.0   

          sig0  sig0_uncert  sig0_qual    crosstrack   pixel_area  \
211  17.657549     8.661376        0.0  11937.3