feat(modflow): support dataframe for pkg data (#2010)

* support pandas dataframe for node_data, stress_period_data etc for flopy.modflow.* pkgs * just convert to recarray, don't reimplement data storage with pandas like for mf6 * add tests with mnw and wel
modflowpy · Nov 22, 2023 · be104c4 · be104c4
1 parent d0546ce
commit be104c4
Show file tree

Hide file tree

Showing 17 changed files with 185 additions and 61 deletions.
diff --git a/autotest/test_mnw.py b/autotest/test_mnw.py
@@ -23,7 +23,7 @@ def mnw1_path(example_data_path):
     return example_data_path / "mf2005_test"
 
 
-def test_load(function_tmpdir, example_data_path, mnw2_examples_path):
+def test_load(function_tmpdir, mnw2_examples_path):
     """t027 test load of MNW2 Package"""
     # load in the test problem (1 well, 3 stress periods)
     m = Modflow.load(
@@ -94,7 +94,8 @@ def test_mnw1_load_write(function_tmpdir, mnw1_path):
         assert np.array_equal(v, m2.mnw1.stress_period_data[k])
 
 
-def test_make_package(function_tmpdir):
+@pytest.mark.parametrize("dataframe", [True, False])
+def test_make_package(function_tmpdir, dataframe):
     """t027 test make MNW2 Package"""
     ws = function_tmpdir
     m4 = Modflow("mnw2example", model_ws=ws)
@@ -195,6 +196,9 @@ def test_make_package(function_tmpdir):
         ).view(np.recarray),
     }
 
+    if dataframe:
+        node_data = pd.DataFrame(node_data)
+
     mnw2_4 = ModflowMnw2(
         model=m4,
         mnwmax=2,
@@ -257,6 +261,9 @@ def test_make_package(function_tmpdir):
         ).view(np.recarray),
     }
 
+    if dataframe:
+        node_data = pd.DataFrame(node_data)
+
     mnw2_4 = ModflowMnw2(
         model=m4,
         mnwmax=2,
@@ -294,7 +301,8 @@ def test_make_package(function_tmpdir):
     )
 
 
-def test_mnw2_create_file(function_tmpdir):
+@pytest.mark.parametrize("dataframe", [True, False])
+def test_mnw2_create_file(function_tmpdir, dataframe):
     """
     Test for issue #556, Mnw2 crashed if wells have
     multiple node lengths
@@ -341,8 +349,12 @@ def test_mnw2_create_file(function_tmpdir):
             wellids[i],
             nnodes=nlayers[i],
             nper=len(stress_period_data.index),
-            node_data=node_data.to_records(index=False),
-            stress_period_data=stress_period_data.to_records(index=False),
+            node_data=node_data.to_records(index=False)
+            if not dataframe
+            else node_data,
+            stress_period_data=stress_period_data.to_records(index=False)
+            if not dataframe
+            else stress_period_data,
         )
 
         wells.append(wl)

diff --git a/autotest/test_modflow.py b/autotest/test_modflow.py
@@ -3,8 +3,10 @@
 import os
 import shutil
 from pathlib import Path
+from typing import Dict
 
 import numpy as np
+import pandas as pd
 import pytest
 from autotest.conftest import get_example_data_path
 from modflow_devtools.markers import excludes_platform, requires_exe
@@ -261,7 +263,7 @@ def test_mt_modelgrid(function_tmpdir):
     assert np.array_equal(swt.modelgrid.idomain, ml.modelgrid.idomain)
 
 
-@requires_exe("mp7")
+@requires_exe("mp7", "mf2005")
 def test_exe_selection(example_data_path, function_tmpdir):
     model_path = example_data_path / "freyberg"
     namfile_path = model_path / "freyberg.nam"
@@ -1287,7 +1289,91 @@ def test_load_with_list_reader(function_tmpdir):
     assert np.array_equal(originalwelra, m2.wel.stress_period_data[0])
 
 
-def get_basic_modflow_model(ws, name):
+@requires_exe("mf2005")
+@pytest.mark.parametrize(
+    "container",
+    [
+        str(np.recarray),
+        str(pd.DataFrame),
+        str(Dict[int, np.recarray]),
+        str(Dict[int, pd.DataFrame]),
+    ],
+)
+def test_pkg_data_containers(function_tmpdir, container):
+    """Test various containers for package data (list, ndarray, recarray, dataframe, dict of such)"""
+
+    name = "pkg_data"
+    ws = function_tmpdir
+    m = Modflow(name, model_ws=ws)
+    size = 100
+    nlay = 10
+    nper = 1
+
+    # grid discretization
+    dis = ModflowDis(
+        m,
+        nper=nper,
+        nlay=nlay,
+        nrow=size,
+        ncol=size,
+        top=nlay,
+        botm=list(range(nlay)),
+    )
+
+    # recharge pkg
+    rch = ModflowRch(
+        m, rech={k: 0.001 - np.cos(k) * 0.001 for k in range(nper)}
+    )
+
+    # well pkg, setup data per 'container' parameter
+    # to test support for various container types
+    ra = ModflowWel.get_empty(size**2)
+    ra_per = ra.copy()
+    ra_per["k"] = 1
+    ra_per["i"] = (
+        (np.ones((size, size)) * np.arange(size))
+        .transpose()
+        .ravel()
+        .astype(int)
+    )
+    ra_per["j"] = list(range(size)) * size
+    wel_dtype = np.dtype(
+        [
+            ("k", int),
+            ("i", int),
+            ("j", int),
+            ("flux", np.float32),
+        ]
+    )
+    df_per = pd.DataFrame(ra_per)
+    if "'numpy.recarray'" in container:
+        well_spd = ra_per
+    elif "'pandas.core.frame.DataFrame'" in container:
+        well_spd = df_per
+    elif "Dict[int, numpy.recarray]" in container:
+        well_spd = {}
+        well_spd[0] = ra_per
+    elif "Dict[int, pandas.core.frame.DataFrame]" in container:
+        well_spd = {}
+        well_spd[0] = df_per
+    wel = ModflowWel(m, stress_period_data=well_spd)
+
+    # basic pkg
+    bas = ModflowBas(m)
+
+    # solver
+    pcg = ModflowPcg(m)
+
+    # output control pkg
+    oc = ModflowOc(m)
+
+    # write and run the model
+    m.write_input()
+    success, _ = m.run_model(silent=False)
+    assert success
+
+
+def get_perftest_model(ws, name):
     m = Modflow(name, model_ws=ws)
 
     size = 100
@@ -1338,20 +1424,20 @@ def get_basic_modflow_model(ws, name):
 @pytest.mark.slow
 def test_model_init_time(function_tmpdir, benchmark):
     name = inspect.getframeinfo(inspect.currentframe()).function
-    benchmark(lambda: get_basic_modflow_model(ws=function_tmpdir, name=name))
+    benchmark(lambda: get_perftest_model(ws=function_tmpdir, name=name))
 
 
 @pytest.mark.slow
 def test_model_write_time(function_tmpdir, benchmark):
     name = inspect.getframeinfo(inspect.currentframe()).function
-    model = get_basic_modflow_model(ws=function_tmpdir, name=name)
+    model = get_perftest_model(ws=function_tmpdir, name=name)
     benchmark(lambda: model.write_input())
 
 
 @pytest.mark.slow
 def test_model_load_time(function_tmpdir, benchmark):
     name = inspect.getframeinfo(inspect.currentframe()).function
-    model = get_basic_modflow_model(ws=function_tmpdir, name=name)
+    model = get_perftest_model(ws=function_tmpdir, name=name)
     model.write_input()
     benchmark(
         lambda: Modflow.load(

diff --git a/flopy/modflow/mfag.py b/flopy/modflow/mfag.py
@@ -11,6 +11,7 @@
 import os
 
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils.flopy_io import multi_line_strip
@@ -29,9 +30,9 @@ class ModflowAg(Package):
         model object
     options : flopy.utils.OptionBlock object
         option block object
-    time_series : np.recarray
+    time_series : np.recarray or pd.DataFrame
         numpy recarray for the time series block
-    well_list : np.recarray
+    well_list : np.recarray or pd.DataFrame
         recarray of the well_list block
     irrdiversion : dict {per: np.recarray}
         dictionary of the irrdiversion block
@@ -269,8 +270,16 @@ def __init__(
         else:
             self.options = OptionBlock("", ModflowAg)
 
-        self.time_series = time_series
-        self.well_list = well_list
+        self.time_series = (
+            time_series.to_records(index=False)
+            if isinstance(time_series, pd.DataFrame)
+            else time_series
+        )
+        self.well_list = (
+            well_list.to_records(index=False)
+            if isinstance(well_list, pd.DataFrame)
+            else well_list
+        )
         self.irrdiversion = irrdiversion
         self.irrwell = irrwell
         self.supwell = supwell

diff --git a/flopy/modflow/mfchd.py b/flopy/modflow/mfchd.py
@@ -24,8 +24,7 @@ class ModflowChd(Package):
     model : model object
         The model object (of type :class:`flopy.modflow.mf.Modflow`) to which
         this package will be added.
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe, or dictionary of boundaries.
 
         Each chd cell is defined through definition of
         layer (int), row (int), column (int), shead (float), ehead (float)

diff --git a/flopy/modflow/mfdrn.py b/flopy/modflow/mfdrn.py
@@ -27,8 +27,7 @@ class ModflowDrn(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is None).
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
         Each drain cell is defined through definition of
         layer(int), row(int), column(int), elevation(float),
         conductance(float).

diff --git a/flopy/modflow/mfdrt.py b/flopy/modflow/mfdrt.py
@@ -27,8 +27,7 @@ class ModflowDrt(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is None).
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
         Each drain return cell is defined through definition of
         layer(int), row(int), column(int), elevation(float),
         conductance(float), layerR (int) , rowR (int), colR (int) and rfprop (float).

diff --git a/flopy/modflow/mffhb.py b/flopy/modflow/mffhb.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import read1d
@@ -64,7 +65,7 @@ class ModflowFhb(Package):
         (default is 0.0)
     cnstm5 : float
         A constant multiplier for data list flwrat. (default is 1.0)
-    ds5 : list or numpy array or recarray
+    ds5 : list or numpy array or recarray or pandas dataframe
         Each FHB flwrat cell (dataset 5) is defined through definition of
         layer(int), row(int), column(int), iaux(int), flwrat[nbdtime](float).
         There should be nflw entries. (default is None)
@@ -81,7 +82,7 @@ class ModflowFhb(Package):
 
     cnstm7 : float
         A constant multiplier for data list sbhedt. (default is 1.0)
-    ds7 : list or numpy array or recarray
+    ds7 : list or numpy array or recarray or pandas dataframe
         Each FHB sbhed cell (dataset 7) is defined through definition of
         layer(int), row(int), column(int), iaux(int), sbhed[nbdtime](float).
         There should be nhed entries. (default is None)
@@ -211,6 +212,8 @@ def __init__(
                 raise TypeError(msg)
             elif isinstance(ds5, list):
                 ds5 = np.array(ds5)
+            elif isinstance(ds5, pd.DataFrame):
+                ds5 = ds5.to_records(index=False)
             # convert numpy array to a recarray
             if ds5.dtype != dtype:
                 ds5 = np.core.records.fromarrays(ds5.transpose(), dtype=dtype)
@@ -228,6 +231,8 @@ def __init__(
                 raise TypeError(msg)
             elif isinstance(ds7, list):
                 ds7 = np.array(ds7)
+            elif isinstance(ds7, pd.DataFrame):
+                ds7 = ds7.to_records(index=False)
             # convert numpy array to a recarray
             if ds7.dtype != dtype:
                 ds7 = np.core.records.fromarrays(ds7.transpose(), dtype=dtype)

diff --git a/flopy/modflow/mfgage.py b/flopy/modflow/mfgage.py
@@ -10,6 +10,7 @@
 import os
 
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import read_fixed_var, write_fixed_var
@@ -27,7 +28,7 @@ class ModflowGage(Package):
         this package will be added.
     numgage : int
         The total number of gages included in the gage file (default is 0).
-    gage_data : list or numpy array
+    gage_data : list or numpy array or recarray or pandas dataframe
         data for dataset 2a and 2b in the gage package. If a list is provided
         then the list includes 2 to 3 entries (LAKE UNIT [OUTTYPE]) for each
         LAK Package entry and 4 entries (GAGESEG GAGERCH UNIT OUTTYPE) for
@@ -132,6 +133,8 @@ def __init__(
                     gage_data = np.core.records.fromarrays(
                         gage_data.transpose(), dtype=dtype
                     )
+            elif isinstance(gage_data, pd.DataFrame):
+                gage_data = gage_data.to_records(index=False)
             elif isinstance(gage_data, list):
                 d = ModflowGage.get_empty(ncells=numgage)
                 for n in range(len(gage_data)):

diff --git a/flopy/modflow/mfghb.py b/flopy/modflow/mfghb.py
@@ -27,8 +27,7 @@ class ModflowGhb(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is 0).
-    stress_period_data : list of boundaries, recarray of boundaries or,
-        dictionary of boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
 
         Each ghb cell is defined through definition of
         layer(int), row(int), column(int), stage(float), conductance(float)

diff --git a/flopy/modflow/mfhyd.py b/flopy/modflow/mfhyd.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils.recarray_utils import create_empty_recarray
@@ -31,7 +32,7 @@ class ModflowHyd(Package):
         is a user-specified value that is output if a value cannot be computed
         at a hydrograph location. For example, the cell in which the hydrograph
         is located may be a no-flow cell. (default is -999.)
-    obsdata : list of lists, numpy array, or numpy recarray (nhyd, 7)
+    obsdata : list of lists, numpy array or recarray, or pandas dataframe (nhyd, 7)
         Each row of obsdata includes data defining pckg (3 character string),
         arr (2 character string), intyp (1 character string) klay (int),
         xl (float), yl (float), hydlbl (14 character string) for each
@@ -158,6 +159,8 @@ def __init__(
 
         dtype = ModflowHyd.get_default_dtype()
         obs = ModflowHyd.get_empty(nhyd)
+        if isinstance(obsdata, pd.DataFrame):
+            obsdata = obsdata.to_records(index=False)
         if isinstance(obsdata, list):
             if len(obsdata) != nhyd:
                 raise RuntimeError(