Make pyarrow optional (#113)

* make pyarrow optional * add importorskip
openforcefield · Apr 11, 2024 · 72d52f7 · 72d52f7
1 parent 71a4af4
commit 72d52f7
Show file tree

Hide file tree

Showing 9 changed files with 66 additions and 214 deletions.
diff --git a/devtools/conda-envs/test_env_dgl_false.yaml b/devtools/conda-envs/test_env_dgl_false.yaml
@@ -23,8 +23,8 @@ dependencies:
   - scipy
   - ambertools
 
-  # database
-  - pyarrow
+  # # database
+  # - pyarrow
 
   # gcn
   - pytorch >=2.0

diff --git a/openff/nagl/label/_label.py b/openff/nagl/label/_label.py
diff --git a/openff/nagl/label/dataset.py b/openff/nagl/label/dataset.py
@@ -3,18 +3,16 @@
 import tqdm
 import typing
 
-import numpy as np
-import pyarrow as pa
-import pyarrow.dataset as ds
-import pyarrow.parquet as pq
-
-from openff.units import unit
-
 from openff.nagl.utils._parallelization import get_mapper_to_processes
 from openff.nagl.label.labels import LabellerType
+from openff.utilities import requires_package
+
+if typing.TYPE_CHECKING:
+    import pyarrow
 
 class LabelledDataset:
 
+    @requires_package("pyarrow")
     def __init__(
             self,
             source,
@@ -28,6 +26,8 @@ def to_pandas(self, columns=None):
         return self.dataset.to_table(columns=columns).to_pandas()
 
     def _reload(self):
+        import pyarrow.dataset as ds
+
         self.dataset = ds.dataset(self.source, format="parquet")
 
     @classmethod
@@ -44,6 +44,8 @@ def from_smiles(
     ):
         from openff.toolkit import Molecule
 
+        import pyarrow as pa
+        import pyarrow.dataset as ds
 
         loader = functools.partial(
             Molecule.from_smiles,
@@ -84,16 +86,19 @@ def from_smiles(
 
     def append_columns(
         self,
-        columns: typing.Dict[pa.Field, typing.Iterable[typing.Any]],
+        columns: typing.Dict["pyarrow.Field", typing.Iterable[typing.Any]],
         exist_ok: bool = False,
     ):
         self._append_columns(columns, exist_ok=exist_ok)
 
     def _append_columns(
         self,
-        columns: typing.Dict[pa.Field, typing.Iterable[typing.Any]],
+        columns: typing.Dict["pyarrow.Field", typing.Iterable[typing.Any]],
         exist_ok: bool = False,
     ):
+        import pyarrow.dataset as ds
+        import pyarrow.parquet as pq
+
         from .utils import _append_column_to_table
 
         n_all_rows = self.dataset.count_rows()

diff --git a/openff/nagl/label/labels.py b/openff/nagl/label/labels.py
@@ -7,14 +7,13 @@
 import typing
 
 import numpy as np
-import pyarrow as pa
-import pyarrow.parquet as pq
-import pyarrow.dataset as ds
-
 from openff.units import unit
+from openff.utilities import requires_package
 
 from openff.nagl._base.base import ImmutableModel
-from openff.utilities import requires_package
+
+if typing.TYPE_CHECKING:
+    import pyarrow
 
 ChargeMethodType = typing.Literal[
     "am1bcc", "am1-mulliken", "gasteiger", "formal_charge",
@@ -28,12 +27,13 @@ class _BaseLabel(ImmutableModel, abc.ABC):
     smiles_column: str = "mapped_smiles"
     verbose: bool = False
 
+    @requires_package("pyarrow")
     def _append_column(
         self,
-        table: pa.Table,
-        key: typing.Union[pa.Field, str],
+        table: "pyarrow.Table",
+        key: typing.Union["pyarrow.Field", str],
         values: typing.Iterable[typing.Any],
-    ) -> pa.Table:
+    ) -> "pyarrow.Table":
         from .utils import _append_column_to_table
         return _append_column_to_table(
             table,
@@ -46,9 +46,9 @@ def _append_column(
     @abc.abstractmethod
     def apply(
         self,
-        table: pa.Table,
+        table: "pyarrow.Table",
         verbose: bool = False,
-    ) -> pa.Table:
+    ) -> "pyarrow.Table":
         raise NotImplementedError()
 
 
@@ -62,10 +62,11 @@ class LabelConformers(_BaseLabel):
 
     def apply(
         self,
-        table: pa.Table,
+        table: "pyarrow.Table",
         verbose: bool = False,
     ):
         from openff.toolkit import Molecule
+        import pyarrow as pa
 
         rms_cutoff = self.rms_cutoff
         if not isinstance(rms_cutoff, unit.Quantity):
@@ -170,9 +171,11 @@ def _assign_charges(
 
     def apply(
         self,
-        table: pa.Table,
+        table: "pyarrow.Table",
         verbose: bool = False,
     ):
+        import pyarrow as pa
+
         rows = table.to_pylist()
         if verbose:
             rows = tqdm.tqdm(rows, desc="Assigning charges")
@@ -221,9 +224,11 @@ def _calculate_dipoles(
 
     def apply(
         self,
-        table: pa.Table,
+        table: "pyarrow.Table",
         verbose: bool = False,
     ):
+        import pyarrow as pa
+
         rows = table.to_pylist()
         if verbose:
             rows = tqdm.tqdm(rows, desc="Calculating dipoles")
@@ -322,9 +327,11 @@ def _calculate_esp(
 
     def apply(
         self,
-        table: pa.Table,
+        table: "pyarrow.Table",
         verbose: bool = False,
     ):
+        import pyarrow as pa
+
         rows = table.to_pylist()
         if verbose:
             rows = tqdm.tqdm(rows, desc="Calculating ESPs")
@@ -402,7 +409,7 @@ def apply(
 ]
 
 def apply_labellers(
-    table: pa.Table,
+    table: "pyarrow.Table",
     labellers: typing.Iterable[LabellerType],
     verbose: bool = False,
 ):
@@ -417,6 +424,9 @@ def apply_labellers_to_batch_file(
     labellers: typing.Iterable[LabellerType] = tuple(),
     verbose: bool = False,
 ):
+    import pyarrow.dataset as ds
+    import pyarrow.parquet as pq
+
     if not labellers:
         return
     source = pathlib.Path(source)

diff --git a/openff/nagl/label/utils.py b/openff/nagl/label/utils.py
@@ -1,16 +1,14 @@
 import logging
 import typing
 
-import numpy as np
-
-import pyarrow.parquet as pq
-import pyarrow.dataset as ds
+from openff.utilities import requires_package
 
 if typing.TYPE_CHECKING:
     import pyarrow as pa
 
 logger = logging.getLogger(__name__)
 
+@requires_package("pyarrow")
 def _append_column_to_table(
     table: "pa.Table",
     key: typing.Union["pa.Field", str],