# Model Output Notebook

<img style="float:center;" src="https://arcticexpansion.vse.gmu.edu/sites/arcticexpansion.vsnet.gmu.edu/files/images/header5d2.png" width=600px>

### ADCIRC-SWAN Output


### Initialize Libraries

In [1]:
import netCDF4 as nc4;        import pandas as pd
import pathlib as pl;         import geopandas as gpd
import numpy as np;           import xarray as xr

from shapely import Polygon,Point,MultiPoint,LineString,MultiLineString;import shapely.vectorized
from sklearn.neighbors import BallTree
from scipy.stats import linregress
import multiprocessing as mp
from collections import defaultdict

source $HOME/miniforge3/bin/activate

salloc --ntasks=5 --nodes=1 --partition=normal --time=10:00:00

### Defined Functions

In [2]:

def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))

def point_lookup(model_lat:np.array, model_lon:np.array, satellite_lat:np.array, satellite_lon:np.array):
    tree = BallTree(np.deg2rad(np.c_[model_lat,model_lon]), metric='haversine')
    distances, indices = tree.query(np.deg2rad(np.c_[satellite_lat, satellite_lon]), k = 1)
    return distances*6371,indices

def read_file_worker(args):
    file_path, var_list, node_ids = args
    result = {}
    try:
        with nc4.Dataset(file_path, "r") as ds:
            for var in var_list:
                nc_var = var if var in ds.variables else var.replace("-", "_")
                if nc_var in ds.variables:
                    v = ds.variables[nc_var]
                    nt = len(v)
                    nn = len(node_ids)
                    data = np.empty((nt, nn), dtype=np.float32)
                    for t in range(nt):
                        data[t, :] = v[t, node_ids]
                    result[var] = data
                else:
                    print(f"⚠️ Variable {nc_var} not found in {file_path.name}")
    except Exception as e:
        print(f"❌ Error reading {file_path.name}: {e}")
    return result


#### Data for this exercise can be found here
https://doi.org/10.17603/ds2-h0fw-2p96

Download the swan_HS.63.nc from one of the 4 folders

---

In [3]:
root = pl.Path('/groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/Raw_DATA')
outdir = pl.Path('/groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/Processed_DATA')
gdf = gpd.read_file('/groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/arctic_shapefiles/comm4process/nodes4communities.shp')

In [9]:
comm_lats = gdf.geometry.y.values[:10]
comm_lons = gdf.geometry.x.values[:10]
comm_names = gdf["community"].astype(str).values[:10]  # Adjust to your column name
node_ids = gdf["node_ids"].values[:10].astype(int)      # Adjust to your node column name
max_name_len = max(len(name) for name in comm_names)

In [6]:
year = 2024
output_dir = root / str(year) / "outputs"

outdir.mkdir(parents=True, exist_ok=True)


file_var_map = {
    "zeta": "fort.63.nc",
    "u-vel": "fort.64.nc",
    "v-vel": "fort.64.nc",
    "pressure": "fort.73.nc",
    "windx": "fort.74.nc",
    "windy": "fort.74.nc",
    "iceaf": "fort.93.nc",
    "swan_HS": "swan_HS.63.nc",
    "swan_TPS": "swan_TPS.63.nc",
    "swan_DIR": "swan_DIR.63.nc"
}

file_to_vars = defaultdict(list)
for var, fname in file_var_map.items():
    file_to_vars[fname].append(var)
fort63 = output_dir / "fort.63.nc"
if fort63.exists():
    with nc4.Dataset(fort63) as ds:
        base_time = pd.to_datetime(ds.variables["time"].base_date)
        nt = ds.dimensions["time"].size
        time_all_dt = pd.date_range(start=base_time, periods=nt, freq="1h").to_numpy(dtype="datetime64[ns]")
        time_all = (time_all_dt - np.datetime64("1970-01-01T00:00:00Z")) // np.timedelta64(1, "s")
else:
    raise FileNotFoundError("fort.63.nc missing — required to extract base_date for time axis.")


  time_all = (time_all_dt - np.datetime64("1970-01-01T00:00:00Z")) // np.timedelta64(1, "s")


In [None]:
tasks = []
for fname, var_list in file_to_vars.items():
    file_path = output_dir / fname
    if file_path.exists():
        tasks.append((file_path, var_list, node_ids))

# --------------------------
# Run parallel processing
# --------------------------
with mp.Pool(processes=min(len(tasks), mp.cpu_count())) as pool:
    results = pool.map(read_file_worker, tasks)


In [34]:
data_by_var = {var: None for var in file_var_map}
for result in results:
    for var, data in result.items():
        if data_by_var[var] is None:
            data_by_var[var] = data

# --------------------------
# Filter time to target calendar year
# --------------------------
start = np.datetime64(f"{year}-01-01T00:00:00")
end = np.datetime64(f"{year}-12-31T20:00:00")
mask = (time_all_dt >= start) & (time_all_dt <= end)
time_all = time_all[mask]

for var in data_by_var:
    if data_by_var[var] is not None and data_by_var[var].shape[0] == len(mask):
        data_by_var[var] = data_by_var[var][mask, :]

In [35]:
out_path = outdir / f"{year}_v3.nc"
nt, nn = len(time_all), len(node_ids)

with nc4.Dataset(out_path, "w", format="NETCDF4") as ds_out:
    # Global metadata
    ds_out.title = "ADCIRC-SWAN Simulation for Alaska"
    ds_out.institution = "GMU Flood Hazards Research Lab"
    ds_out.source = "ADCIRC-SWAN"
    ds_out.history = f"Created on {pd.Timestamp.now()} by Tyler Miesse"
    ds_out.Conventions = "CF-1.8"
    ds_out.contact = "tmiesse@gmu.edu"
    ds_out.summary = (
        "Hourly water level, wind, pressure, ice, and wave parameters for "
        "Arctic communities extracted from ADCIRC-SWAN simulations."
    )

    # Dimensions
    ds_out.createDimension("time", nt)
    ds_out.createDimension("node", nn)
    ds_out.createDimension("name_strlen", max_name_len)

    # Coordinates
    tvar = ds_out.createVariable("time", "f8", ("time",))
    tvar[:] = time_all
    tvar.units = "seconds since 1970-01-01 00:00:00"
    tvar.calendar = "standard"
    tvar.long_name = "Time"

    lat = ds_out.createVariable("lat", "f4", ("node",))
    lat[:] = comm_lats
    lat.long_name = "Latitude of community"

    lon = ds_out.createVariable("lon", "f4", ("node",))
    lon[:] = comm_lons
    lon.long_name = "Longitude of community"

    name_array = np.array([list(n.ljust(max_name_len)) for n in comm_names], dtype="S1")
    name_var = ds_out.createVariable("community", "S1", ("node", "name_strlen"))
    name_var[:, :] = name_array
    name_var.long_name = "Community name"

    # Data variables with metadata
    for var in data_by_var:
        v = ds_out.createVariable(var, "f4", ("time", "node"), zlib=True, fill_value=np.nan)

        # Variable-specific metadata
        if var == "zeta":
            v.long_name = "Water surface elevation above geoid"
            v.units = "m"
        elif var == "u-vel":
            v.long_name = "Eastward depth-averaged velocity"
            v.units = "m/s"
        elif var == "v-vel":
            v.long_name = "Northward depth-averaged velocity"
            v.units = "m/s"
        elif var == "pressure":
            v.long_name = "Atmospheric pressure at 10m"
            v.units = "Pa"
        elif var == "windx":
            v.long_name = "Eastward 10m wind velocity"
            v.units = "m/s"
        elif var == "windy":
            v.long_name = "Northward 10m wind velocity"
            v.units = "m/s"
        elif var == "iceaf":
            v.long_name = "Sea ice area fraction"
            v.units = "1"
        elif var == "swan_HS":
            v.long_name = "Significant wave height"
            v.units = "m"
        elif var == "swan_TPS":
            v.long_name = "Peak spectral wave period"
            v.units = "s"
        elif var == "swan_DIR":
            v.long_name = "Mean wave direction"
            v.units = "degrees"

        # Fill in data
        if data_by_var[var] is not None and data_by_var[var].shape == (nt, nn):
            v[:, :] = data_by_var[var]
        else:
            v[:, :] = np.full((nt, nn), np.nan)

print(f"✅ Extracted and saved to {out_path}")

✅ Extracted and saved to /groups/ORC-CLIMATE/fhrl_repo/Arctic_Database/Processed_DATA/2024_v3.nc


In [36]:
test = xr.open_dataset(out_path)

In [37]:
test