In [1]:
import os
import netCDF4 as nc
import numpy as np

def get_lat_lon_var_names(dataset, group_name="data_01"):
    """Return the appropriate latitude and longitude variable names within a specified group or root dataset."""
    lat_names = ['latitude', 'lat']
    lon_names = ['longitude', 'lon']
    lat_var = lon_var = None

    # First check in the specified group if it exists
    if group_name in dataset.groups:
        group = dataset.groups[group_name]
        for var in group.variables:
            if var in lat_names:
                lat_var = var
            if var in lon_names:
                lon_var = var

    # If variables not found in group, check in the root dataset
    if not lat_var or not lon_var:
        for var in dataset.variables:
            if var in lat_names and not lat_var:  # Only update if not already found
                lat_var = var
            if var in lon_names and not lon_var:
                lon_var = var

    return lat_var, lon_var

def is_within_bounding_box(lat, lon, min_lat, max_lat, min_lon, max_lon):
    """Check if any part of the dataset is within the bounding box."""
    if lon.max() > 180:  # If longitude is in 360 format
        lon = np.where(lon > 180, lon - 360, lon)
    return np.any((lat >= min_lat) & (lat <= max_lat) & (lon >= min_lon) & (lon <= max_lon))

def process_files(directory, min_lat, max_lat, min_lon, max_lon, group_name="data_01"):
    inside_files = []
    outside_files = []

    for filename in os.listdir(directory):
        if filename.endswith('.nc'):
            filepath = os.path.join(directory, filename)
            try:
                with nc.Dataset(filepath, 'r') as dataset:
                    lat_var, lon_var = get_lat_lon_var_names(dataset, group_name)
                    if lat_var and lon_var:
                        # Determine if the variables are in a group or root and access accordingly
                        if group_name in dataset.groups and lat_var in dataset.groups[group_name].variables:
                            group = dataset.groups[group_name]
                            lat = group.variables[lat_var][:]
                            lon = group.variables[lon_var][:]
                        else:
                            lat = dataset.variables[lat_var][:]
                            lon = dataset.variables[lon_var][:]

                        if is_within_bounding_box(lat, lon, min_lat, max_lat, min_lon, max_lon):
                            inside_files.append(filename)
                        else:
                            outside_files.append(filename)
                    else:
                        print(f"Latitude/Longitude variables not found in {filename}")
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    with open('inside.txt', 'w') as f:
        for item in inside_files:
            f.write(f"{item}\n")

    with open('outside.txt', 'w') as f:
        for item in outside_files:
            f.write(f"{item}\n")

# Define your bounding box here
max_lat = 46
min_lat = 8
max_lon = -60
min_lon = -100

# Directory containing your NetCDF files
directory = '/work2/07174/soelem/data/swotnad'

process_files(directory, min_lat, max_lat, min_lon, max_lon)