Make random seeding consistent with PRNG keys everywhere instead of s…

…eeds (#290) * add ENS * removed ENS * Test * Test * Change gw_barycenter and neural dual * fix random number generators in initializers * k-means and gromov * fix rng on low rank geometry and datasets * fix remaining seed changes to rng * fixed rng reference and increased epsilon to make algo converge, should open issue * fixed keys to rngs * extended rng instead of keys to inside functions and added jax.random.PRNGKeyArray as default type * fixed more keys to rng with correct default type * ran pre-commit * Fix typo --------- Co-authored-by: Anastasiia <aivanova23@apple.com> Co-authored-by: michalk8 <46717574+michalk8@users.noreply.github.com>
ott-jax · Feb 24, 2023 · 05ff6dd · 05ff6dd
1 parent 9bd2fe9
commit 05ff6dd
Show file tree

Hide file tree

Showing 52 changed files with 743 additions and 694 deletions.
diff --git a/src/ott/geometry/geometry.py b/src/ott/geometry/geometry.py
@@ -623,7 +623,7 @@ def to_LRCGeometry(
       self,
       rank: int = 0,
       tol: float = 1e-2,
-      seed: int = 0,
+      rng: jax.random.PRNGKeyArray = jax.random.PRNGKey(0),
       scale: float = 1.
   ) -> 'low_rank.LRCGeometry':
     r"""Factorize the cost matrix using either SVD (full) or :cite:`indyk:19`.
@@ -642,7 +642,7 @@ def to_LRCGeometry(
       rank: Target rank of the :attr:`cost_matrix`.
       tol: Tolerance of the error. The total number of sampled points is
         :math:`min(n, m,\frac{rank}{tol})`.
-      seed: Random seed.
+      rng: The PRNG key to use for initializing the model.
       scale: Value used to rescale the factors of the low-rank geometry.
         Useful when this geometry is used in the linear term of fused GW.
 
@@ -664,27 +664,26 @@ def to_LRCGeometry(
       cost_1 = u
       cost_2 = (s[:, None] * vh).T
     else:
-      rng = jax.random.PRNGKey(seed)
-      key1, key2, key3, key4, key5 = jax.random.split(rng, 5)
+      rng1, rng2, rng3, rng4, rng5 = jax.random.split(rng, 5)
       n_subset = min(int(rank / tol), n, m)
 
-      i_star = jax.random.randint(key1, shape=(), minval=0, maxval=n)
-      j_star = jax.random.randint(key2, shape=(), minval=0, maxval=m)
+      i_star = jax.random.randint(rng1, shape=(), minval=0, maxval=n)
+      j_star = jax.random.randint(rng2, shape=(), minval=0, maxval=m)
 
       ci_star = self.subset(i_star, None).cost_matrix.ravel() ** 2  # (m,)
       cj_star = self.subset(None, j_star).cost_matrix.ravel() ** 2  # (n,)
 
       p_row = cj_star + ci_star[j_star] + jnp.mean(ci_star)  # (n,)
       p_row /= jnp.sum(p_row)
-      row_ixs = jax.random.choice(key3, n, shape=(n_subset,), p=p_row)
+      row_ixs = jax.random.choice(rng3, n, shape=(n_subset,), p=p_row)
       # (n_subset, m)
       s = self.subset(row_ixs, None).cost_matrix
       s /= jnp.sqrt(n_subset * p_row[row_ixs][:, None])
 
       p_col = jnp.sum(s ** 2, axis=0)  # (m,)
       p_col /= jnp.sum(p_col)
       # (n_subset,)
-      col_ixs = jax.random.choice(key4, m, shape=(n_subset,), p=p_col)
+      col_ixs = jax.random.choice(rng4, m, shape=(n_subset,), p=p_col)
       # (n_subset, n_subset)
       w = s[:, col_ixs] / jnp.sqrt(n_subset * p_col[col_ixs][None, :])
 
@@ -696,7 +695,7 @@ def to_LRCGeometry(
       v = v.T / jnp.sqrt(d)[None, :]
 
       inv_scale = (1. / jnp.sqrt(n_subset))
-      col_ixs = jax.random.choice(key5, m, shape=(n_subset,))  # (n_subset,)
+      col_ixs = jax.random.choice(rng5, m, shape=(n_subset,))  # (n_subset,)
 
       # (n, n_subset)
       A_trans = self.subset(None, col_ixs).cost_matrix * inv_scale

diff --git a/src/ott/geometry/low_rank.py b/src/ott/geometry/low_rank.py
@@ -231,9 +231,13 @@ def finalize(carry):
     return max_value + self._bias
 
   def to_LRCGeometry(
-      self, rank: int = 0, tol: float = 1e-2, seed: int = 0
+      self,
+      rank: int = 0,
+      tol: float = 1e-2,
+      rng: jax.random.PRNGKeyArray = jax.random.PRNGKey(0),
   ) -> 'LRCGeometry':
     """Return self."""
+    del rank, tol, rng
     return self
 
   @property

diff --git a/src/ott/initializers/linear/initializers_lr.py b/src/ott/initializers/linear/initializers_lr.py
@@ -66,7 +66,7 @@ def __init__(self, rank: int, **kwargs: Any):
   def init_q(
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
@@ -75,7 +75,7 @@ def init_q(
 
     Args:
       ot_prob: OT problem.
-      key: Random key for seeding.
+      rng: Random key for seeding.
       init_g: Initial value for :math:`g` factor.
       kwargs: Additional keyword arguments.
 
@@ -87,7 +87,7 @@ def init_q(
   def init_r(
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
@@ -96,7 +96,7 @@ def init_r(
 
     Args:
       ot_prob: Linear OT problem.
-      key: Random key for seeding.
+      rng: Random key for seeding.
       init_g: Initial value for :math:`g` factor.
       kwargs: Additional keyword arguments.
 
@@ -108,14 +108,14 @@ def init_r(
   def init_g(
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     """Initialize the low-rank factor :math:`g`.
 
     Args:
       ot_prob: OT problem.
-      key: Random key for seeding.
+      rng: Random key for seeding.
       kwargs: Additional keyword arguments.
 
     Returns:
@@ -176,7 +176,7 @@ def __call__(
       r: Optional[jnp.ndarray] = None,
       g: Optional[jnp.ndarray] = None,
       *,
-      key: Optional[jnp.ndarray] = None,
+      rng: jax.random.PRNGKeyArray = jax.random.PRNGKey(0),
       **kwargs: Any
   ) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]:
     """Initialize the factors :math:`Q`, :math:`R` and :math:`g`.
@@ -189,23 +189,21 @@ def __call__(
         using :meth:`init_r`.
       g: Factor of shape ``[rank,]``. If `None`, it will be initialized
         using :meth:`init_g`.
-      key: Random key for seeding.
+      rng: Random key for seeding.
       kwargs: Additional keyword arguments for :meth:`init_q`, :meth:`init_r`
         and :meth:`init_g`.
 
     Returns:
       The factors :math:`Q`, :math:`R` and :math:`g`, respectively.
     """
-    if key is None:
-      key = jax.random.PRNGKey(0)
-    key1, key2, key3 = jax.random.split(key, 3)
+    rng1, rng2, rng3 = jax.random.split(rng, 3)
 
     if g is None:
-      g = self.init_g(ot_prob, key1, **kwargs)
+      g = self.init_g(ot_prob, rng1, **kwargs)
     if q is None:
-      q = self.init_q(ot_prob, key2, init_g=g, **kwargs)
+      q = self.init_q(ot_prob, rng2, init_g=g, **kwargs)
     if r is None:
-      r = self.init_r(ot_prob, key3, init_g=g, **kwargs)
+      r = self.init_r(ot_prob, rng3, init_g=g, **kwargs)
 
     assert g.shape == (self.rank,)
     assert q.shape == (ot_prob.a.shape[0], self.rank)
@@ -240,37 +238,37 @@ class RandomInitializer(LRInitializer):
   def init_q(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     del kwargs, init_g
     a = ot_prob.a
-    init_q = jnp.abs(jax.random.normal(key, (a.shape[0], self.rank)))
+    init_q = jnp.abs(jax.random.normal(rng, (a.shape[0], self.rank)))
     return a[:, None] * (init_q / jnp.sum(init_q, axis=1, keepdims=True))
 
   def init_r(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     del kwargs, init_g
     b = ot_prob.b
-    init_r = jnp.abs(jax.random.normal(key, (b.shape[0], self.rank)))
+    init_r = jnp.abs(jax.random.normal(rng, (b.shape[0], self.rank)))
     return b[:, None] * (init_r / jnp.sum(init_r, axis=1, keepdims=True))
 
   def init_g(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     del kwargs
-    init_g = jnp.abs(jax.random.uniform(key, (self.rank,))) + 1.
+    init_g = jnp.abs(jax.random.uniform(rng, (self.rank,))) + 1.
     return init_g / jnp.sum(init_g)
 
 
@@ -314,32 +312,32 @@ def _compute_factor(
   def init_q(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
-    del key, kwargs
+    del rng, kwargs
     return self._compute_factor(ot_prob, init_g, which="q")
 
   def init_r(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
-    del key, kwargs
+    del rng, kwargs
     return self._compute_factor(ot_prob, init_g, which="r")
 
   def init_g(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       **kwargs: Any,
   ) -> jnp.ndarray:
-    del key, kwargs
+    del rng, kwargs
     return jnp.ones((self.rank,)) / self.rank
 
 
@@ -387,7 +385,7 @@ def _extract_array(
   def _compute_factor(
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       which: Literal["q", "r"],
@@ -413,7 +411,7 @@ def _compute_factor(
     arr = self._extract_array(geom, first=which == "q")
     marginals = ot_prob.a if which == "q" else ot_prob.b
 
-    centroids = fn(arr, self.rank, key=key).centroids
+    centroids = fn(arr, self.rank, rng=rng).centroids
     geom = pointcloud.PointCloud(
         arr, centroids, epsilon=0.1, scale_cost="max_cost"
     )
@@ -425,34 +423,34 @@ def _compute_factor(
   def init_q(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     return self._compute_factor(
-        ot_prob, key, init_g=init_g, which="q", **kwargs
+        ot_prob, rng, init_g=init_g, which="q", **kwargs
     )
 
   def init_r(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       **kwargs: Any,
   ) -> jnp.ndarray:
     return self._compute_factor(
-        ot_prob, key, init_g=init_g, which="r", **kwargs
+        ot_prob, rng, init_g=init_g, which="r", **kwargs
     )
 
   def init_g(  # noqa: D102
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       **kwargs: Any,
   ) -> jnp.ndarray:
-    del key, kwargs
+    del rng, kwargs
     return jnp.ones((self.rank,)) / self.rank
 
   def tree_flatten(self) -> Tuple[Sequence[Any], Dict[str, Any]]:  # noqa: D102
@@ -518,7 +516,7 @@ class State(NamedTuple):  # noqa: D106
   def _compute_factor(
       self,
       ot_prob: Problem_t,
-      key: jnp.ndarray,
+      rng: jax.random.PRNGKeyArray,
       *,
       init_g: jnp.ndarray,
       which: Literal["q", "r"],
@@ -530,7 +528,7 @@ def _compute_factor(
 
     def init_fn() -> GeneralizedKMeansInitializer.State:
       n = geom.shape[0]
-      factor = jnp.abs(jax.random.normal(key, (n, self.rank))) + 1.  # (n, r)
+      factor = jnp.abs(jax.random.normal(rng, (n, self.rank))) + 1.  # (n, r)
       factor *= consts.marginal[:, None] / jnp.sum(
           factor, axis=1, keepdims=True
       )