# Model Output Notebook

<img style="float:center;" src="https://arcticexpansion.vse.gmu.edu/sites/arcticexpansion.vsnet.gmu.edu/files/images/header5d2.png" width=600px>

### ADCIRC-SWAN Output


### Initialize Libraries

In [1]:
import netCDF4 as nc4;        import pandas as pd
import pathlib as pl;         import geopandas as gpd
import numpy as np;           import xarray as xr
import multiprocessing as mp; import datetime
import os
from shapely import Polygon,Point,MultiPoint,LineString,MultiLineString;import shapely.vectorized
from sklearn.neighbors import BallTree
from scipy.stats import linregress
from collections import defaultdict
import warnings;warnings.filterwarnings("ignore")

source $HOME/miniforge3/bin/activate

salloc --ntasks=5 --nodes=1 --partition=normal --time=10:00:00

### Defined Functions

#### Data for this exercise can be found here
https://doi.org/10.17603/ds2-h0fw-2p96

Download the swan_HS.63.nc from one of the 4 folders

---

In [2]:
def process_chunk(chunk_df, chunk_id):
    node_ids = chunk_df["node_ids"].values.astype(int)
    comm_names = chunk_df["name"].astype(str).values
    comm_lats = chunk_df.geometry.y.values
    comm_lons = chunk_df.geometry.x.values
    max_name_len = max(len(name) for name in comm_names)

    file_var_map = {
        "zeta": "fort.63.nc", "depth": "fort.63.nc", "u-vel": "fort.64.nc", "v-vel": "fort.64.nc",
        "pressure": "fort.73.nc", "windx": "fort.74.nc", "windy": "fort.74.nc",
        "iceaf": "fort.93.nc", "swan_HS": "swan_HS.63.nc",
        "swan_TPS": "swan_TPS.63.nc", "swan_DIR": "swan_DIR.63.nc"
    }
    file_to_vars = defaultdict(list)
    for var, fname in file_var_map.items():
        file_to_vars[fname].append(var)

    # Get time from fort.63.nc
    fort63 = OUTPUT_DIR / "swan_HS.63.nc"
    with nc4.Dataset(fort63) as ds:
        base_time = pd.to_datetime(ds.variables["time"].base_date)
        nt = ds.dimensions["time"].size
        time_all_dt = pd.date_range(start=base_time, periods=nt, freq="1H").to_numpy("datetime64[ns]")
        time_all = (time_all_dt - np.datetime64("1970-01-01T00:00:00Z")) // np.timedelta64(1, "s")

    # Filter to year only
    start = np.datetime64(f"{YEAR}-01-01T00:00:00")
    end = np.datetime64(f"{YEAR}-12-31T23:00:00")
    mask = (time_all_dt >= start) & (time_all_dt <= end)
    time_all = time_all[mask]

    # Extract data
    data_by_var = {var: None for var in file_var_map}
    for fname, var_list in file_to_vars.items():
        file_path = OUTPUT_DIR / fname
        if not file_path.exists(): continue

        with nc4.Dataset(file_path, "r") as ds:
            for var in var_list:
                nc_var = var if var in ds.variables else var.replace("-", "_")
                if nc_var not in ds.variables: continue
                v = ds.variables[nc_var]
                if v.ndim == 1:
                    total_nodes = v.shape[0]
                    valid_indices = [i for i in node_ids if i < total_nodes]
                    if len(valid_indices) < len(node_ids):
                        print(f"⚠️ Some node_ids are out of bounds for {file_path.name} (max index = {total_nodes-1})")
                    full_data = np.full((nt, len(node_ids)), np.nan, dtype=np.float32)
                    node_data = v[valid_indices]
                    for i, vi in enumerate(valid_indices):
                        full_data[:, i] = node_data[i]  # broadcast across time
                    data_by_var[var] = full_data[mask, :]
                else:
                    total_nodes = v.shape[1]
                    valid_indices = [i for i in node_ids if i < total_nodes]
                    if len(valid_indices) < len(node_ids):
                        print(f"⚠️ Some node_ids are out of bounds for {file_path.name} (max index = {total_nodes-1})")
                    data = np.full((nt, len(node_ids)), np.nan, dtype=np.float32)
                    for t in range(nt):
                        try:
                            data[t, :len(valid_indices)] = v[t, valid_indices]
                        except RuntimeError as e:
                            print(f"❌ Error reading variable '{var}' from file '{file_path}' at timestep {t}: {e}")
                            continue
                    data_by_var[var] = data[mask, :]

    # Write to NetCDF
    out_path = PROCESSED_DIR / f"extracted_outputs_{YEAR}_part{chunk_id:02d}.nc"
    nt_filtered = len(time_all)
    with nc4.Dataset(out_path, "w", format="NETCDF4") as ds_out:
        ds_out.title = f"Arctic Alaska Coastal Hazards Dataset [1979 - 2024] Data for {YEAR}"
        ds_out.institution = "GMU Flood Hazards Research Lab"
        ds_out.source = "ADCIRC-SWAN"
        ds_out.history = f"Created on {datetime.datetime.now()} by Tyler Miesse"
        ds_out.Conventions = "CF-1.8"
        ds_out.contact = "tmiesse@gmu.edu"
        ds_out.author = "Tyler W. Miesse, Andre de Souza de Lima, Martin Henke, Celso Ferreira, and Thomas Ravens"
        ds_out.acknowledgment = "This research was supported by funding from the National Science Foundation (Award No. 1927785) ."
        ds_out.funding = "NSF NNA Track 1: Arctic impacts and reverberations of expanding global maritime trade routes"
        ds_out.summary = (
            "This study utilizes ADCIRC+SWAN to simulate interactions between the ocean, land, sea ice, and atmosphere,"\
            " focusing on the period from 1979 to 2024 for Western to Northern Alaska coasts. Data from the European Centre"\
            " for Medium-Range Weather Forecasts Re-Analysis (ERA5), including sea ice concentration and atmospheric forcing"\
            " were utilized to support these simulations, which investigate annual conditions in the Alaskan Arctic. This is" \
            " dataset is extracted parameters for communities found in western to northern Alaska. For other areas in Alaska" \
            " not found in this dataset please check out the Raw_DATA."
        )

        ds_out.createDimension("time", nt_filtered)
        ds_out.createDimension("node", len(node_ids))
        ds_out.createDimension("name_strlen", max_name_len)

        tvar = ds_out.createVariable("time", "f8", ("time",))
        tvar[:] = time_all
        tvar.units = "seconds since 1970-01-01 00:00:00"
        tvar.calendar = "standard"
        tvar.long_name = "Time"

        lat = ds_out.createVariable("lat", "f4", ("node",)); lat[:] = comm_lats; lat.long_name = "Latitude"
        lon = ds_out.createVariable("lon", "f4", ("node",)); lon[:] = comm_lons; lon.long_name = "Longitude"

        name_array = np.array([list(n.ljust(max_name_len)) for n in comm_names], dtype="S1")
        name_var = ds_out.createVariable("community", "S1", ("node", "name_strlen"))
        name_var[:, :] = name_array
        name_var.long_name = "Community name"

        for var in data_by_var:
            v = ds_out.createVariable(var, "f4", ("time", "node"), zlib=True, fill_value=np.nan)
            if var == "zeta":
                v.long_name = "Water surface elevation above msl"
                v.units = "m"
                v.standard_name = "sea_surface_height_above_msl"
                v.description = "Modeled water level relative to local mean sea level. Positive upwards."
                v.valid_min = -50.0; v.valid_max = 50.0
            elif var == "u-vel":
                v.long_name = "Eastward depth-averaged velocity"
                v.units = "m/s"
                v.standard_name = "eastward_sea_water_velocity"
                v.description = "Eastward component of depth-averaged ocean current."
                v.valid_min = -50.0; v.valid_max = 50.0
            elif var == "v-vel":
                v.long_name = "Northward depth-averaged velocity"
                v.units = "m/s"
                v.standard_name = "northward_sea_water_velocity"
                v.description = "Northward component of depth-averaged ocean current."
                v.valid_min = -50.0; v.valid_max = 50.0
            elif var == "pressure":
                v.long_name = "Atmospheric pressure at 10m"
                v.units = "Pa"
                v.standard_name = "air_pressure_at_sea_level"
                v.description = "Surface atmospheric pressure at 10-meter height."
                v.valid_min = 60000.0; v.valid_max = 130000.0
            elif var == "windx":
                v.long_name = "Eastward 10m wind velocity"
                v.units = "m/s"
                v.standard_name = "eastward_wind"
                v.description = "Eastward component of wind velocity at 10 meters."
                v.valid_min = -50.0; v.valid_max = 50.0
            elif var == "windy":
                v.long_name = "Northward 10m wind velocity"
                v.units = "m/s"
                v.standard_name = "northward_wind"
                v.description = "Northward component of wind velocity at 10 meters."
                v.valid_min = -50.0; v.valid_max = 50.0
            elif var == "iceaf":
                v.long_name = "Sea ice area fraction"
                v.units = "1"
                v.standard_name = "sea_ice_area_fraction"
                v.description = "Pecent fraction of ocean surface covered by sea ice (0 to 100)."
                v.valid_min = 0.0; v.valid_max = 100.0
            elif var == "swan_HS":
                v.long_name = "Significant wave height"
                v.units = "m"
                v.standard_name = "significant_height_of_wind_and_swell_waves"
                v.description = "Height of significant wind and swell waves."
                v.valid_min = 0.0; v.valid_max = 30.0
            elif var == "swan_TPS":
                v.long_name = "Peak spectral wave period"
                v.units = "s"
                v.standard_name = "wave_period_at_variance_spectral_density_maximum"
                v.description = "Wave period at the peak of the variance density spectrum."
                v.valid_min = 0.0; v.valid_max = 30.0
            elif var == "swan_DIR":
                v.long_name = "Mean wave direction"
                v.units = "degrees"
                v.standard_name = "direction_of_wind_waves"
                v.description = "Direction from which waves are coming, measured clockwise from true north."
                v.valid_min = 0.0; v.valid_max = 360.0
            elif var == "depth":
                v.long_name = "Bathymetric depth at node"
                v.units = "m"
                v.standard_name = "sea_floor_depth_below_geoid"
                v.description = "Depth of the ocean bottom relative to the geoid."
                v.positive = "down"
                v.valid_min = -1000.0; v.valid_max = 1000.0
            if data_by_var[var] is not None:
                v[:, :] = data_by_var[var]
            else:
                v[:, :] = np.full((nt_filtered, len(node_ids)), np.nan)

    print(f"✅ Finished chunk {chunk_id:02d} → {out_path.name}")

In [None]:
YEARS = [2000,1999,1998,1997,1996,1995,1994,1993,1992,1991,1990]
for YEAR in YEARS:
    N_CHUNKS = 12
    ROOT = pl.Path('/groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/Raw_DATA')
    PROCESSED_DIR = pl.Path('/scratch/tmiesse/project/data4trends')
    SHAPEFILE = pl.Path('/groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/arctic_shapefiles/comm4process/stations_locs2.shp')
    OUTPUT_DIR = ROOT / str(YEAR) / "outputs"

    if __name__ == '__main__':
        gdf = gpd.read_file(SHAPEFILE)
        chunks = np.array_split(gdf, N_CHUNKS)
        args = [(chunk.reset_index(drop=True), i+1) for i, chunk in enumerate(chunks)]
        with mp.Pool(N_CHUNKS) as pool:
            pool.starmap(process_chunk, args)
        print("✅ All chunks complete.")

    part_files = sorted(PROCESSED_DIR.glob(f"extracted_outputs_{YEAR}_part*.nc"))
    if part_files:
        print("🔄 Merging chunks with xarray...")
        ds_merged = xr.open_mfdataset(part_files, combine='nested', concat_dim='node')
        merged_path = PROCESSED_DIR / f"{YEAR}.nc"
        ds_merged.to_netcdf(merged_path)
        ds_merged.close()

        # Cleanup
        for f in part_files:
            os.remove(f)
        print(f"✅ Merged NetCDF written to {merged_path} and part files removed.")

✅ Finished chunk 03 → extracted_outputs_2000_part03.nc✅ Finished chunk 04 → extracted_outputs_2000_part04.nc✅ Finished chunk 08 → extracted_outputs_2000_part08.nc✅ Finished chunk 05 → extracted_outputs_2000_part05.nc✅ Finished chunk 07 → extracted_outputs_2000_part07.nc




✅ Finished chunk 10 → extracted_outputs_2000_part10.nc
✅ Finished chunk 12 → extracted_outputs_2000_part12.nc
✅ Finished chunk 11 → extracted_outputs_2000_part11.nc
✅ Finished chunk 01 → extracted_outputs_2000_part01.nc
✅ Finished chunk 09 → extracted_outputs_2000_part09.nc
✅ Finished chunk 02 → extracted_outputs_2000_part02.nc
✅ Finished chunk 06 → extracted_outputs_2000_part06.nc
✅ All chunks complete.
🔄 Merging chunks with xarray...
✅ Merged NetCDF written to /scratch/tmiesse/project/data4trends/2000.nc and part files removed.
✅ Finished chunk 01 → extracted_outputs_1999_part01.nc✅ Finished chunk 04 → extracted_outputs_1999_part04.nc✅ Finished chunk 05 → extracted_outputs_1999_part05.nc✅ Finished chunk 11 → extra