Merge pull request #82 from rmnldwg/release-1.1.0

Release 1.1.0
rmnldwg · Mar 20, 2024 · 6559f11 · 6559f11
2 parents 6afa3cb + 7ca44f9
commit 6559f11
Show file tree

Hide file tree

Showing 9 changed files with 340 additions and 260 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,28 @@
 All notable changes to this project will be documented in this file.
 
 
+<a name="1.1.0"></a>
+## [1.1.0] - 2024-03-20
+
+### Features
+
+- (**utils**) Add `safe_set_params()` function.\
+  This checks whether the params are a dict, list, or None and handles
+  them accordingly. Just a convencience method that helped refactor some methods.
+- Allow to pass state distributions to `posterior_state_dist()` and `risk()` methds. Fixes [#80].\
+  With this, one can use precomputed state distributions to speed up
+  computing the posterior or risk for multiple scenarios.
+
+### Refactor
+
+- Use `safe_set_params()` across models.
+
+### Testing
+
+- Add checks for midline risk. Related [#80].
+- (**mid**) Fix wrong assumption in risk test.
+
+
 <a name="1.0.0"></a>
 ## [1.0.0] - 2024-03-18
 
@@ -604,7 +626,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 - add pre-commit hook to check commit msg
 
 
-[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0...HEAD
+[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.1.0...HEAD
+[1.1.0]: https://github.com/rmnldwg/lymph/compare/1.0.0...1.1.0
 [1.0.0]: https://github.com/rmnldwg/lymph/compare/1.0.0.rc2...1.0.0
 [1.0.0.rc2]: https://github.com/rmnldwg/lymph/compare/1.0.0.rc1...1.0.0.rc2
 [1.0.0.rc1]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...1.0.0.rc1
@@ -620,6 +643,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 [0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1
 [0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0
 
+[#80]: https://github.com/rmnldwg/lymph/issues/80
 [#79]: https://github.com/rmnldwg/lymph/issues/79
 [#77]: https://github.com/rmnldwg/lymph/issues/77
 [#74]: https://github.com/rmnldwg/lymph/issues/74

diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py
@@ -7,14 +7,7 @@
 import numpy as np
 import pandas as pd
 
-from lymph import diagnose_times, matrix, modalities, models, types
-from lymph.utils import (
-    add_or_mult,
-    early_late_mapping,
-    flatten,
-    synchronize_params,
-    unflatten_and_split,
-)
+from lymph import diagnose_times, matrix, modalities, models, types, utils
 
 warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
 logger = logging.getLogger(__name__)
@@ -189,7 +182,7 @@ def get_tumor_spread_params(
             params = params["ipsi"]
 
         if as_flat or not as_dict:
-            params = flatten(params)
+            params = utils.flatten(params)
 
         return params if as_dict else params.values()
 
@@ -221,7 +214,7 @@ def get_lnl_spread_params(
             params = params["ipsi"]
 
         if as_flat or not as_dict:
-            params = flatten(params)
+            params = utils.flatten(params)
 
         return params if as_dict else params.values()
 
@@ -276,7 +269,7 @@ def get_spread_params(
             params.update(self.get_lnl_spread_params(as_flat=as_flat))
 
         if as_flat or not as_dict:
-            params = flatten(params)
+            params = utils.flatten(params)
 
         return params if as_dict else params.values()
 
@@ -300,14 +293,14 @@ def get_params(
         params.update(self.get_distribution_params(as_flat=as_flat))
 
         if as_flat or not as_dict:
-            params = flatten(params)
+            params = utils.flatten(params)
 
         return params if as_dict else params.values()
 
 
     def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]:
         """Set the parameters of the model's spread from tumor to LNLs."""
-        kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"])
+        kwargs, global_kwargs = utils.unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"])
 
         ipsi_kwargs = global_kwargs.copy()
         ipsi_kwargs.update(kwargs.get("ipsi", {}))
@@ -316,7 +309,7 @@ def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]
 
         args = self.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs)
         if self.is_symmetric["tumor_spread"]:
-            synchronize_params(
+            utils.synchronize_params(
                 get_from=self.ipsi.graph.tumor_edges,
                 set_to=self.contra.graph.tumor_edges,
             )
@@ -328,7 +321,9 @@ def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]
 
     def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]:
         """Set the parameters of the model's spread from LNLs to tumor."""
-        kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"])
+        kwargs, global_kwargs = utils.unflatten_and_split(
+            kwargs, expected_keys=["ipsi", "contra"],
+        )
 
         ipsi_kwargs = global_kwargs.copy()
         ipsi_kwargs.update(kwargs.get("ipsi", {}))
@@ -337,7 +332,7 @@ def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]:
 
         args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs)
         if self.is_symmetric["lnl_spread"]:
-            synchronize_params(
+            utils.synchronize_params(
                 get_from=self.ipsi.graph.lnl_edges,
                 set_to=self.contra.graph.lnl_edges,
             )
@@ -394,7 +389,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]:
     def load_patient_data(
         self,
         patient_data: pd.DataFrame,
-        mapping: callable | dict[int, Any] = early_late_mapping,
+        mapping: callable | dict[int, Any] = utils.early_late_mapping,
     ) -> None:
         """Load patient data into the model.
 
@@ -427,17 +422,13 @@ def state_dist(
             ipsi_state_evo = self.ipsi.state_dist_evo()
             contra_state_evo = self.contra.state_dist_evo()
             time_marg_matrix = np.diag(self.get_distribution(t_stage).pmf)
+            result = ipsi_state_evo.T @ time_marg_matrix @ contra_state_evo
 
-            result = (
-                ipsi_state_evo.T
-                @ time_marg_matrix
-                @ contra_state_evo
-            )
         elif mode == "BN":
             ipsi_state_dist = self.ipsi.state_dist(mode=mode)
             contra_state_dist = self.contra.state_dist(mode=mode)
-
             result = np.outer(ipsi_state_dist, contra_state_dist)
+
         else:
             raise ValueError(f"Unknown mode '{mode}'.")
 
@@ -515,7 +506,7 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float
                 self.ipsi.diagnose_matrix(stage),
                 joint_state_dist @ self.contra.diagnose_matrix(stage).T,
             )
-            llh = add_or_mult(llh, patient_llhs, log)
+            llh = utils.add_or_mult(llh, patient_llhs, log)
 
         return llh
 
@@ -547,12 +538,7 @@ def likelihood(
         try:
             # all functions and methods called here should raise a ValueError if the
             # given parameters are invalid...
-            if given_params is None:
-                pass
-            elif isinstance(given_params, dict):
-                self.set_params(**given_params)
-            else:
-                self.set_params(*given_params)
+            utils.safe_set_params(self, given_params)
         except ValueError:
             return -np.inf if log else 0.
 
@@ -564,32 +550,36 @@ def likelihood(
         raise ValueError("Invalid mode. Must be either 'HMM' or 'BN'.")
 
 
-    def posterior_joint_state_dist(
+    def posterior_state_dist(
         self,
         given_params: types.ParamsType | None = None,
+        given_state_dist: np.ndarray | None = None,
         given_diagnoses: dict[str, types.DiagnoseType] | None = None,
         t_stage: str | int = "early",
         mode: Literal["HMM", "BN"] = "HMM",
     ) -> np.ndarray:
         """Compute joint post. dist. over ipsi & contra states, ``given_diagnoses``.
 
-        The ``given_diagnoses`` is a dictionary storing a :py:obj:`.types.DiagnoseType`
-        for the ``"ipsi"`` and ``"contra"`` side of the neck.
+        The ``given_diagnoses`` is a dictionary storing one :py:obj:`.types.DiagnoseType`
+        each for the ``"ipsi"`` and ``"contra"`` side of the neck.
 
         Essentially, this is the risk for any possible combination of ipsi- and
         contralateral involvement, given the provided diagnoses.
 
-        Note:
-            The computation is much faster if no parameters are given, since then the
-            transition matrix does not need to be recomputed.
+        Warning:
+            As in the :py:meth:`.Unilateral.posterior_state_dist` method, one may
+            provide a precomputed (joint) state distribution via the ``given_state_dist``
+            argument (should be a square matric). In this case, the ``given_params``
+            are ignored and the model does not need to recompute e.g. the
+            :py:meth:`.transition_matrix` or :py:meth:`.state_dist`, making the
+            computation much faster.
 
-        See Also:
-            :py:meth:`.Unilateral.posterior_state_dist`
+            However, this will mean that ``t_stage`` and ``mode`` are also ignored,
+            since these are only used to compute the state distribution.
         """
-        if isinstance(given_params, dict):
-            self.set_params(**given_params)
-        else:
-            self.set_params(*given_params)
+        if given_state_dist is None:
+            utils.safe_set_params(self, given_params)
+            given_state_dist = self.state_dist(t_stage=t_stage, mode=mode)
 
         if given_diagnoses is None:
             given_diagnoses = {}
@@ -606,47 +596,40 @@ def posterior_joint_state_dist(
             # vector with P(Z=z|X) for each state X. A data matrix for one "patient"
             diagnose_given_state[side] = diagnose_encoding @ observation_matrix.T
 
-        joint_state_dist = self.state_dist(t_stage=t_stage, mode=mode)
         # matrix with P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc.
         joint_diagnose_and_state = np.outer(
             diagnose_given_state["ipsi"],
             diagnose_given_state["contra"],
-        ) * joint_state_dist
+        ) * given_state_dist
         # Following Bayes' theorem, this is P(Xi,Xc|Zi=zi,Zc=zc) which is given by
         # P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) / P(Zi=zi,Zc=zc)
         return joint_diagnose_and_state / np.sum(joint_diagnose_and_state)
 
 
     def risk(
         self,
-        involvement: types.PatternType | None = None,
+        involvement: dict[str, types.PatternType] | None = None,
         given_params: types.ParamsType | None = None,
+        given_state_dist: np.ndarray | None = None,
         given_diagnoses: dict[str, types.DiagnoseType] | None = None,
         t_stage: str = "early",
         mode: Literal["HMM", "BN"] = "HMM",
     ) -> float:
-        """Compute risk of an ``involvement`` pattern, given parameters and diagnoses.
+        """Compute risk of the ``involvement`` patterns, given parameters and diagnoses.
 
-        The parameters can be set via the ``given_params`` and ``given_params``, both
-        of which are passed to the :py:meth:`.set_params` method. The
-        ``given_diagnoses`` must be a dictionary mapping the side of the neck to a
-        :py:obj:`.types.DiagnoseType`.
+        The ``involvement`` of interest is expected to be a :py:obj:`.PatternType` for
+        each side of the neck (``"ipsi"`` and ``"contra"``). This method then
+        marginalizes over those posterior state probabilities that match the
+        ``involvement`` patterns.
 
-        Note:
-            The computation is much faster if no parameters are given, since then the
-            transition matrix does not need to be recomputed.
-
-        See Also:
-            :py:meth:`.Unilateral.risk`
-                The unilateral method for computing the risk of an involvment pattern.
-            :py:meth:`.Bilateral.comp_posterior_joint_state_dist`
-                This method computes the joint distribution over ipsi- and
-                contralateral states, given the parameters and diagnoses. The risk then
-                only marginalizes over the states that match the involvement pattern.
+        If ``involvement`` is not provided, the method returns the posterior state
+        distribution as computed by the :py:meth:`.posterior_state_dist` method. See
+        its docstring for more details on the remaining arguments.
         """
         # TODO: test this method
-        posterior_state_probs = self.posterior_joint_state_dist(
+        posterior_state_probs = self.posterior_state_dist(
             given_params=given_params,
+            given_state_dist=given_state_dist,
             given_diagnoses=given_diagnoses,
             t_stage=t_stage,
             mode=mode,
@@ -660,7 +643,7 @@ def risk(
             side_graph = getattr(self, side).graph
             marginalize_over_states[side] = matrix.compute_encoding(
                 lnls=side_graph.lnls.keys(),
-                pattern=involvement[side],
+                pattern=involvement.get(side, {}),
                 base=3 if self.is_trinary else 2,
             )
         return (