From c1ac21be22bd9572beeb1ffe4d5d0d6184e3959c Mon Sep 17 00:00:00 2001
From: Stefan Doerr <stefdoerr@gmail.com>
Date: Wed, 27 Mar 2024 16:51:39 +0200
Subject: [PATCH 1/4] support a zip of ckpt files for ensemble models

---
 torchmdnet/models/model.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/torchmdnet/models/model.py b/torchmdnet/models/model.py
index c090f90f..eed99024 100644
--- a/torchmdnet/models/model.py
+++ b/torchmdnet/models/model.py
@@ -144,7 +144,7 @@ def load_model(filepath, args=None, device="cpu", return_std=False, **kwargs):
 
        If a list of paths is given, an :py:mod:`Ensemble` model is returned.
     Args:
-        filepath (str or list): Path to the checkpoint file or a list of paths.
+        filepath (str or list): Path to the checkpoint file or a list of paths or a zip of checkpoints.
         args (dict, optional): Arguments for the model. Defaults to None.
         device (str, optional): Device on which the model should be loaded. Defaults to "cpu".
         return_std (bool, optional): Whether to return the standard deviation of an Ensemble model. Defaults to False.
@@ -159,6 +159,23 @@ def load_model(filepath, args=None, device="cpu", return_std=False, **kwargs):
             return_std=return_std,
         )
 
+    if filepath.endswith(".zip"):
+        import zipfile
+        import tempfile
+        from glob import glob
+        import os
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with zipfile.ZipFile(filepath, "r") as z:
+                z.extractall(tmpdir)
+
+            filepath = glob(os.path.join(tmpdir, "*.ckpt"))
+
+            return Ensemble(
+                [load_model(f, args=args, device=device, **kwargs) for f in filepath],
+                return_std=return_std,
+            )
+
     ckpt = torch.load(filepath, map_location="cpu")
     if args is None:
         args = ckpt["hyper_parameters"]

From 9ece01b87d04168a5809bf3c660df69bc4b98262 Mon Sep 17 00:00:00 2001
From: Stefan Doerr <stefdoerr@gmail.com>
Date: Wed, 27 Mar 2024 16:57:36 +0200
Subject: [PATCH 2/4] add test

---
 tests/test_model.py | 60 ++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 20 deletions(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 1dd5e354..f606559e 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -116,26 +116,28 @@ def test_cuda_graph_compatible(model_name):
     if not torch.cuda.is_available():
         pytest.skip("CUDA not available")
     z, pos, batch = create_example_batch()
-    args = {"model": model_name,
-            "embedding_dimension": 128,
-            "num_layers": 2,
-            "num_rbf": 32,
-            "rbf_type": "expnorm",
-            "trainable_rbf": False,
-            "activation": "silu",
-            "cutoff_lower": 0.0,
-            "cutoff_upper": 5.0,
-            "max_z": 100,
-            "max_num_neighbors": 128,
-            "equivariance_invariance_group": "O(3)",
-            "prior_model": None,
-            "atom_filter": -1,
-            "derivative": True,
-            "check_errors": False,
-            "static_shapes": True,
-            "output_model": "Scalar",
-            "reduce_op": "sum",
-            "precision": 32 }
+    args = {
+        "model": model_name,
+        "embedding_dimension": 128,
+        "num_layers": 2,
+        "num_rbf": 32,
+        "rbf_type": "expnorm",
+        "trainable_rbf": False,
+        "activation": "silu",
+        "cutoff_lower": 0.0,
+        "cutoff_upper": 5.0,
+        "max_z": 100,
+        "max_num_neighbors": 128,
+        "equivariance_invariance_group": "O(3)",
+        "prior_model": None,
+        "atom_filter": -1,
+        "derivative": True,
+        "check_errors": False,
+        "static_shapes": True,
+        "output_model": "Scalar",
+        "reduce_op": "sum",
+        "precision": 32,
+    }
     model = create_model(args).to(device="cuda")
     model.eval()
     z = z.to("cuda")
@@ -260,3 +262,21 @@ def test_ensemble():
     assert neg_dy_std.shape == deriv.shape
     assert (y_std == 0).all()
     assert (neg_dy_std == 0).all()
+
+    import zipfile
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        ensemble_zip = join(tmpdir, "ensemble.zip")
+        with zipfile.ZipFile(ensemble_zip, "w") as zipf:
+            for i, ckpt in enumerate(ckpts):
+                zipf.write(ckpt, f"model_{i}.ckpt")
+        ensemble_model = load_model(ensemble_zip, return_std=True)
+        pred_ensemble, deriv_ensemble, y_std, neg_dy_std = ensemble_model(z, pos, batch)
+
+    torch.testing.assert_close(pred, pred_ensemble, atol=1e-5, rtol=1e-5)
+    torch.testing.assert_close(deriv, deriv_ensemble, atol=1e-5, rtol=1e-5)
+    assert y_std.shape == pred.shape
+    assert neg_dy_std.shape == deriv.shape
+    assert (y_std == 0).all()
+    assert (neg_dy_std == 0).all()

From 78482a8a91ebbedff4fa461e2bf95ebde342af4a Mon Sep 17 00:00:00 2001
From: RaulPPealez <raulppelaez@gmail.com>
Date: Thu, 28 Mar 2024 07:35:33 +0100
Subject: [PATCH 3/4] Update docstring

---
 torchmdnet/models/model.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/torchmdnet/models/model.py b/torchmdnet/models/model.py
index eed99024..9ec852b6 100644
--- a/torchmdnet/models/model.py
+++ b/torchmdnet/models/model.py
@@ -142,16 +142,21 @@ def create_model(args, prior_model=None, mean=None, std=None):
 def load_model(filepath, args=None, device="cpu", return_std=False, **kwargs):
     """Load a model from a checkpoint file.
 
-       If a list of paths is given, an :py:mod:`Ensemble` model is returned.
+       If a list of paths or a path to a zip file is given, an :py:mod:`Ensemble` model is returned.
     Args:
-        filepath (str or list): Path to the checkpoint file or a list of paths or a zip of checkpoints.
+        filepath (str or list): Can be any of the following:
+
+            - Path to a checkpoint file. In this case, a :py:mod:`TorchMD_Net` model is returned.
+            - Path to a zip file containing multiple checkpoint files. In this case, an :py:mod:`Ensemble` model is returned.
+            - List of paths to checkpoint files. In this case, an :py:mod:`Ensemble` model is returned.
+
         args (dict, optional): Arguments for the model. Defaults to None.
         device (str, optional): Device on which the model should be loaded. Defaults to "cpu".
         return_std (bool, optional): Whether to return the standard deviation of an Ensemble model. Defaults to False.
         **kwargs: Extra keyword arguments for the model.
 
     Returns:
-        nn.Module: An instance of the TorchMD_Net model.
+        nn.Module: An instance of the TorchMD_Net model or an Ensemble model.
     """
     if isinstance(filepath, (list, tuple)):
         return Ensemble(

From 1a7b2746c48d55124ff5e2fb92fee0f9f85894cb Mon Sep 17 00:00:00 2001
From: RaulPPealez <raulppelaez@gmail.com>
Date: Thu, 28 Mar 2024 07:58:26 +0100
Subject: [PATCH 4/4] Move Emsemble loading to a different function

---
 torchmdnet/models/model.py | 73 ++++++++++++++++++++++++++------------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/torchmdnet/models/model.py b/torchmdnet/models/model.py
index 9ec852b6..91369304 100644
--- a/torchmdnet/models/model.py
+++ b/torchmdnet/models/model.py
@@ -1,8 +1,10 @@
 # Copyright Universitat Pompeu Fabra 2020-2023  https://www.compscience.org
 # Distributed under the MIT License.
 # (See accompanying file README.md file or copy at http://opensource.org/licenses/MIT)
-
+from glob import glob
+import os
 import re
+import tempfile
 from typing import Optional, List, Tuple, Dict
 import torch
 from torch.autograd import grad
@@ -13,6 +15,7 @@
 from torchmdnet import priors
 from lightning_utilities.core.rank_zero import rank_zero_warn
 import warnings
+import zipfile
 
 
 def create_model(args, prior_model=None, mean=None, std=None):
@@ -139,6 +142,47 @@ def create_model(args, prior_model=None, mean=None, std=None):
     return model
 
 
+def load_ensemble(filepath, args=None, device="cpu", return_std=False, **kwargs):
+    """Load an ensemble of models from a list of checkpoint files or a zip file.
+
+    Args:
+        filepath (str or list): Can be any of the following:
+
+            - Path to a zip file containing multiple checkpoint files.
+            - List of paths to checkpoint files.
+
+        args (dict, optional): Arguments for the model. Defaults to None.
+        device (str, optional): Device on which the model should be loaded. Defaults to "cpu".
+        return_std (bool, optional): Whether to return the standard deviation of the predictions. Defaults to False.
+        **kwargs: Extra keyword arguments for the model, will be passed to :py:mod:`load_model`.
+
+    Returns:
+        nn.Module: An instance of :py:mod:`Ensemble`.
+    """
+    if isinstance(filepath, (list, tuple)):
+        assert all(isinstance(f, str) for f in filepath), "Invalid filepath list."
+        model_list = [
+            load_model(f, args=args, device=device, **kwargs) for f in filepath
+        ]
+    elif filepath.endswith(".zip"):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with zipfile.ZipFile(filepath, "r") as z:
+                z.extractall(tmpdir)
+            ckpt_list = glob(os.path.join(tmpdir, "*.ckpt"))
+            assert len(ckpt_list) > 0, "No checkpoint files found in zip file."
+            model_list = [
+                load_model(f, args=args, device=device, **kwargs) for f in ckpt_list
+            ]
+    else:
+        raise ValueError(
+            "Invalid filepath. Must be a list of paths or a path to a zip file."
+        )
+    return Ensemble(
+        model_list,
+        return_std=return_std,
+    )
+
+
 def load_model(filepath, args=None, device="cpu", return_std=False, **kwargs):
     """Load a model from a checkpoint file.
 
@@ -158,29 +202,12 @@ def load_model(filepath, args=None, device="cpu", return_std=False, **kwargs):
     Returns:
         nn.Module: An instance of the TorchMD_Net model or an Ensemble model.
     """
-    if isinstance(filepath, (list, tuple)):
-        return Ensemble(
-            [load_model(f, args=args, device=device, **kwargs) for f in filepath],
-            return_std=return_std,
+    isEnsemble = isinstance(filepath, (list, tuple)) or filepath.endswith(".zip")
+    if isEnsemble:
+        return load_ensemble(
+            filepath, args=args, device=device, return_std=return_std, **kwargs
         )
-
-    if filepath.endswith(".zip"):
-        import zipfile
-        import tempfile
-        from glob import glob
-        import os
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            with zipfile.ZipFile(filepath, "r") as z:
-                z.extractall(tmpdir)
-
-            filepath = glob(os.path.join(tmpdir, "*.ckpt"))
-
-            return Ensemble(
-                [load_model(f, args=args, device=device, **kwargs) for f in filepath],
-                return_std=return_std,
-            )
-
+    assert isinstance(filepath, str)
     ckpt = torch.load(filepath, map_location="cpu")
     if args is None:
         args = ckpt["hyper_parameters"]