# Running FEISTY with forcing from CESM

Kristen generated the forcing files, I believe with `/glade/u/home/kristenk/fish-offline/notebooks/proc-cesm-dple-fields.ipynb`.
This notebook doesn't really need to be a notebook, I'm hoping it can be converted to a script instead.
For now all the parameter settings are handled in the first (non-`import`) cell, but it might make sense to move that to yaml instead.

## Imports

In [1]:
import os

import dask
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from dask.distributed import Client
from dask_jobqueue import PBSCluster

import feisty

# from feisty.utils import generate_single_ds_for_feisty, generate_template

  from distributed.utils import tmpfile


## Configure run

The FEISTY command `config_and_run_from_dataset()` needs an `xarray.Dataset` containing forcing, initial conditions, and bathymetry information.
There are also several parameters controlling how the run is set up.

In [2]:
num_chunks = 18  # number of dask chunks to break data into; this is the number of parallel tasks that will be run
ignore_year_in_forcing = False  # For a spin-up run, generate a dataset with a single year of forcing and set this to true
nyears = 5  # 50 # Length of run (for FOSI, there are 68 years of forcing)
start_date = '0249-01-01'  # Match calendar for forcing, which kept the CESM mid-month dates from the run: 0249-01-15 through 0316-12-15
settings_in = {}  # default settings in feisty/core/default_settings.yaml are correct for FOSI
diagnostic_names = []  # only want biomass in output
max_output_time_dim = 365  # break up output into 1-year chunks
method = 'euler'  # only available time-stepping method at this time

# FEISTY has a script that can read forcing / initial condition files and generate the necessary dataset
# (1) provide paths to netcdf files containing forcing and ic
feisty_data_root = os.path.join(os.sep, 'glade', 'work', 'mlevy', 'codes', 'feisty', 'input_files')
forcing_file = os.path.join(feisty_data_root, 'feisty_input_from_FOSI_monthly.nc')
ic_file = os.path.join(feisty_data_root, 'FOSI_cesm_init_200yr.nc')
# (2) provide a dictionary containing any variables that need to be renamed
forcing_rename = dict()
forcing_rename['time'] = 'forcing_time'
forcing_rename['dep'] = 'bathymetry'
ds = feisty.utils.generate_single_ds_for_feisty(
    num_chunks=num_chunks,
    forcing_file=forcing_file,
    ic_file=ic_file,
    forcing_rename=forcing_rename,
)

ds

Starting forcing dataset generation at 14:30:44


Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(85813,)","(4768,)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 670.41 kiB 37.25 kiB Shape (85813,) (4768,) Count 19 Tasks 18 Chunks Type float64 numpy.ndarray",85813  1,

Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(85813,)","(4768,)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(85813,)","(4768,)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 670.41 kiB 37.25 kiB Shape (85813,) (4768,) Count 19 Tasks 18 Chunks Type float64 numpy.ndarray",85813  1,

Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(85813,)","(4768,)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,19 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 267.12 MiB 14.84 MiB Shape (816, 85813) (816, 4768) Count 19 Tasks 18 Chunks Type float32 numpy.ndarray",85813  816,

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,19 Tasks,18 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,19 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 267.12 MiB 14.84 MiB Shape (816, 85813) (816, 4768) Count 19 Tasks 18 Chunks Type float32 numpy.ndarray",85813  816,

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,19 Tasks,18 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,55 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 267.12 MiB 14.84 MiB Shape (816, 85813) (816, 4768) Count 55 Tasks 18 Chunks Type float32 numpy.ndarray",85813  816,

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(816, 85813)","(816, 4768)"
Count,55 Tasks,18 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(1, 816, 85813)","(1, 816, 4768)"
Count,73 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 267.12 MiB 14.84 MiB Shape (1, 816, 85813) (1, 816, 4768) Count 73 Tasks 18 Chunks Type float32 numpy.ndarray",85813  816  1,

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(1, 816, 85813)","(1, 816, 4768)"
Count,73 Tasks,18 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(1, 816, 85813)","(1, 816, 4768)"
Count,73 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 267.12 MiB 14.84 MiB Shape (1, 816, 85813) (1, 816, 4768) Count 73 Tasks 18 Chunks Type float32 numpy.ndarray",85813  816  1,

Unnamed: 0,Array,Chunk
Bytes,267.12 MiB,14.84 MiB
Shape,"(1, 816, 85813)","(1, 816, 4768)"
Count,73 Tasks,18 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.24 MiB,298.00 kiB
Shape,"(8, 85813)","(8, 4768)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.24 MiB 298.00 kiB Shape (8, 85813) (8, 4768) Count 19 Tasks 18 Chunks Type float64 numpy.ndarray",85813  8,

Unnamed: 0,Array,Chunk
Bytes,5.24 MiB,298.00 kiB
Shape,"(8, 85813)","(8, 4768)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(1, 85813)","(1, 4768)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 670.41 kiB 37.25 kiB Shape (1, 85813) (1, 4768) Count 19 Tasks 18 Chunks Type float64 numpy.ndarray",85813  1,

Unnamed: 0,Array,Chunk
Bytes,670.41 kiB,37.25 kiB
Shape,"(1, 85813)","(1, 4768)"
Count,19 Tasks,18 Chunks
Type,float64,numpy.ndarray


In [3]:
%%time

# Generate a template for the output of map_blocks
template = feisty.utils.generate_template(
    ds=ds,
    nsteps=nyears * 365,
    start_date=start_date,
    diagnostic_names=diagnostic_names,
)

template

Starting template generation at 14:30:44
CPU times: user 10.3 s, sys: 8.9 ms, total: 10.3 s
Wall time: 10.3 s


Unnamed: 0,Array,Chunk
Bytes,10.50 GiB,597.49 MiB
Shape,"(1825, 9, 85813)","(1825, 9, 4768)"
Count,18 Tasks,18 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 10.50 GiB 597.49 MiB Shape (1825, 9, 85813) (1825, 9, 4768) Count 18 Tasks 18 Chunks Type float64 numpy.ndarray",85813  9  1825,

Unnamed: 0,Array,Chunk
Bytes,10.50 GiB,597.49 MiB
Shape,"(1825, 9, 85813)","(1825, 9, 4768)"
Count,18 Tasks,18 Chunks
Type,float64,numpy.ndarray


## Set up Dask cluster

Since the data in `ds` is chunked in `X`, we use a `dask` cluster to configure the parallelization

In [4]:
ncores = np.minimum(num_chunks, 36)
dask.config.set({'distributed.dashboard.link': '/proxy/{port}/status'})
# dask.config.set({"distributed.comm.timeouts.tcp": "180s"})
# dask.config.set({"distributed.comm.timeouts.connect": "180s"})
cluster = PBSCluster(
    memory='300 GB',
    processes=ncores,
    cores=ncores,
    queue='casper',
    walltime='1:00:00',
    resource_spec='select=1:ncpus=36:mem=300GB',
)

cluster.scale(1)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.36:36298,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Run the model

In [6]:
%%time

print(f'Configuring and running FEISTY...')
nsteps = nyears * 365

# map_blocks lets us run in parallel over our dask cluster
ds_out = xr.map_blocks(
    feisty.config_and_run_from_dataset,
    ds,
    args=(
        nsteps,
        start_date,
        ignore_year_in_forcing,
        settings_in,
        diagnostic_names,
        max_output_time_dim,
        method,
    ),
    template=template,
).compute()

print(ds_out.isel(X=55000))

Configuring and running FEISTY...
<xarray.Dataset>
Dimensions:  (time: 1825, group: 9)
Coordinates:
  * time     (time) object 0249-01-01 00:00:00 ... 0253-12-31 00:00:00
  * group    (group) object 'Sf' 'Sp' 'Sd' 'Mf' ... 'Lp' 'Ld' 'benthic_prey'
    X        float64 174.3
Data variables:
    biomass  (time, group) float64 0.007073 2.451e-06 3.462e-05 ... 0.214 20.16
CPU times: user 7.64 s, sys: 14.3 s, total: 22 s
Wall time: 2min 6s


### Plotting

Make a plot of `biomass` over time at a specified column

In [None]:
# Select a column to look at

### FOSI simulations
X = 55000  # looks good compared to matlab (not plotted in this notebook)
# X = 15633  # large error in small classes
# X = 11677  # large error in medium classes
# X = 76989  # large error in benthic

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
for group in ds_out.group.data:
    ds_out.biomass.sel(group=group).isel(X=X).plot(ax=ax)
ax.set_ylim([5e-7, 50])
ax.set_yscale("log")
ax.set_title("python")
plt.legend(ds_out.group.data, bbox_to_anchor=(1.025, 0.5), loc=6)
fig.suptitle(f"biomass at X={X}");