In [None]:
import os
import numpy as np
from datetime import datetime, timedelta
from netCDF4 import Dataset
import re
import geopandas as gpd
from shapely.geometry import Point, box

def extract_start_time(filename):
    match = re.search(r'\d{8}T\d{6}', filename)
    if match:
        datetime_str = match.group()
        return datetime.strptime(datetime_str, '%Y%m%dT%H%M%S')
    else:
        raise ValueError("Unknown file format: " + filename)

def create_model_point_gdf(mod_lat, mod_lon):
    mod_lon_converted = np.where(mod_lon < 0, mod_lon + 360, mod_lon)
    geometry = [Point(lon, lat) for lon, lat in zip(mod_lon_converted, mod_lat)]
    gdf = gpd.GeoDataFrame(geometry=geometry)
    return gdf

def process_satellite_file(satellite_filepath, model_gdf, box_width, box_height):
    satellite_data = Dataset(satellite_filepath, 'r')
    satellite_lats = satellite_data.variables['latitude'][:]
    satellite_lons = satellite_data.variables['longitude'][:]
    ssh_karin = satellite_data.variables['ssh_karin'][:]
    geoid = satellite_data.variables['geoid'][:]
    height_cor_xover = satellite_data.variables['height_cor_xover'][:]

    half_box_width = box_width / 2.0
    half_box_height = box_height / 2.0
    closest_nodes = {}
    
    for i in range(len(satellite_lats)):
        for j in range(len(satellite_lats[i])):
            # Calculate topo with corrections
            topo = ssh_karin[i][j] - geoid[i][j] + height_cor_xover[i][j]
            
            # Skip masked values
            if np.ma.is_masked(topo):
                continue

            # Apply condition: Skip if topo is greater than 5 or less than -5
            if topo > 5 or topo < -5:
                continue  # Skip this point

            # Define satellite pixel bounding box
            sat_lon = satellite_lons[i][j]
            sat_lat = satellite_lats[i][j]
            bbox = box(sat_lon - half_box_width, sat_lat - half_box_height, 
                       sat_lon + half_box_width, sat_lat + half_box_height)

            # Find ADCIRC points inside the bounding box
            contained = model_gdf.sindex.query(bbox, predicate='intersects')
            for index in contained:
                node_number = index + 1
                # Store topo if node is not yet recorded or topo is smaller than current
                if node_number not in closest_nodes or closest_nodes[node_number]['topo'] > topo:
                    closest_nodes[node_number] = {'topo': topo}

    satellite_data.close()
    return closest_nodes

# Load the model data
model_file = '/scratch/07174/soelem/global_2-20km/fort.63.nc'
model_data = Dataset(model_file, 'r')
mod_lat = model_data.variables['y'][:]
mod_lon = model_data.variables['x'][:]
model_gdf = create_model_point_gdf(mod_lat, mod_lon)

satellite_directory = '/work2/07174/soelem/stampede3/Paper-3/karin_data'
file_times = []
for satellite_filename in os.listdir(satellite_directory):
    if satellite_filename.endswith('.nc'):
        try:
            file_datetime = extract_start_time(satellite_filename)
            file_times.append((file_datetime, satellite_filename))
        except ValueError as e:
            print(e)

file_times.sort(key=lambda x: x[0])
start_time = datetime.strptime('20240801', '%Y%m%d')

box_width = 0.0798
box_height = 0.0179

with open('swot_swath.dat', 'w') as file:
    file.write("# Satellite observations\n")
    file.write("3600.0         # 1-hour time increment in seconds\n")
    file.write("0.0            # default nodal value\n")

    last_written_hour = -1
    for file_datetime, satellite_filename in file_times:
        print(f"Processing file: {satellite_filename}")
        current_hour = int((file_datetime - start_time).total_seconds() / 3600)
        if current_hour != last_written_hour:
            for _ in range(current_hour - last_written_hour):
                file.write("##\n")
            last_written_hour = current_hour

        model_gdf = create_model_point_gdf(mod_lat, mod_lon)
        closest_nodes = process_satellite_file(
            os.path.join(satellite_directory, satellite_filename),
            model_gdf, box_width, box_height)

        # Write the results to the file, skipping nodes outside the topo range [-5, 5]
        for node, info in closest_nodes.items():
            topo_rounded = round(info['topo'], 4)
            file.write(f"{node} {topo_rounded}\n")

model_data.close()