ott-jax · michalk8 · Nov 15, 2022 · Nov 9, 2022 · Nov 9, 2022 · Nov 9, 2022
diff --git a/docs/notebooks/neural_dual.ipynb b/docs/notebooks/neural_dual.ipynb
@@ -158,13 +158,13 @@
    "outputs": [],
    "source": [
     "@jax.jit\n",
-    "def sinkhorn_loss(x, y, epsilon=0.1, power=2.0):\n",
+    "def sinkhorn_loss(x, y, epsilon=0.1):\n",
     "    \"\"\"Computes transport between (x, a) and (y, b) via Sinkhorn algorithm.\"\"\"\n",
     "    a = jnp.ones(len(x)) / len(x)\n",
     "    b = jnp.ones(len(y)) / len(y)\n",
     "\n",
     "    sdiv = sinkhorn_divergence(\n",
-    "        pointcloud.PointCloud, x, y, power=power, epsilon=epsilon, a=a, b=b\n",
+    "        pointcloud.PointCloud, x, y, epsilon=epsilon, a=a, b=b\n",
     "    )\n",
     "    return sdiv.divergence"
    ]

diff --git a/docs/notebooks/point_clouds.ipynb b/docs/notebooks/point_clouds.ipynb
@@ -88981,11 +88981,15 @@
     }
    ],
    "source": [
-    "plot_ots(\n",
-    "    optimize(\n",
-    "        x, y, num_iter=400, epsilon=1e-2, power=0.5, cost_fn=costs.Euclidean()\n",
-    "    )\n",
-    ")"
+    "@jax.tree_util.register_pytree_node_class\n",
+    "class Custom(costs.CostFn):\n",
+    "    \"\"\"Custom cost, sqrt of Euclidean norm.\"\"\"\n",
+    "\n",
+    "    def pairwise(self, x, y):\n",
+    "        return jnp.sqrt(jnp.abs(jnp.linalg.norm(x - y)))\n",
+    "\n",
+    "\n",
+    "plot_ots(optimize(x, y, num_iter=400, epsilon=1e-2, cost_fn=Custom()))"
    ]
   },
   {

diff --git a/docs/references.bib b/docs/references.bib
@@ -621,3 +621,10 @@ @inproceedings{korotin:21
 year={2021},
 url={https://openreview.net/forum?id=bEoxzW_EXsa}
 }
+
+@book{boyd:04,
+  title={Convex optimization},
+  author={Boyd, Stephen and Boyd, Stephen P and Vandenberghe, Lieven},
+  year={2004},
+  publisher={Cambridge university press}
+}
diff --git a/ott/core/bar_problems.py b/ott/core/bar_problems.py
@@ -330,9 +330,10 @@ def update_features(self, transports: jnp.ndarray,
     transports = transports * inv_a[None, :, None]
 
     if self._loss_name == "sqeucl":
-      cost = costs.SqEuclidean()
+      cost_fn = costs.SqEuclidean()
       return jnp.sum(
-          weights * barycentric_projection(transports, y_fused, cost), axis=0
+          weights * barycentric_projection(transports, y_fused, cost_fn),
+          axis=0
       )
     raise NotImplementedError(self._loss_name)
 

diff --git a/ott/core/potentials.py b/ott/core/potentials.py
@@ -1,12 +1,12 @@
-from typing import Any, Callable, Dict, Sequence, Tuple
+from typing import Any, Callable, Dict, Optional, Sequence, Tuple
 
 import jax
 import jax.numpy as jnp
 import jax.scipy as jsp
 import jax.tree_util as jtu
 from typing_extensions import Literal
 
-from ott.geometry import pointcloud
+from ott.geometry import costs, pointcloud
 
 __all__ = ["DualPotentials", "EntropicPotentials"]
 Potential_t = Callable[[jnp.ndarray], float]
@@ -22,21 +22,31 @@ class DualPotentials:
   Args:
     f: The first dual potential function.
     g: The second dual potential function.
+    cost_fn: The cost function used to solve the OT problem.
     cor: whether the duals solve the problem in distance form, or correlation
       form (as used for instance for ICNNs, see e.g. top right of p.3 in
       http://proceedings.mlr.press/v119/makkuva20a/makkuva20a.pdf)
   """
 
-  def __init__(self, f: Potential_t, g: Potential_t, *, cor: bool = False):
+  def __init__(
+      self,
+      f: Potential_t,
+      g: Potential_t,
+      *,
+      cost_fn: Optional[costs.CostFn] = None,
+      cor: bool = False
+  ):
     self._f = f
     self._g = g
+    self.cost_fn = costs.SqEuclidean() if cost_fn is None else cost_fn
     self._cor = cor
 
   def transport(self, vec: jnp.ndarray, forward: bool = True) -> jnp.ndarray:
     """Transport ``vec`` according to Brenier formula.
 
     Theorem 1.17 in http://math.univ-lyon1.fr/~santambrogio/OTAM-cvgmt.pdf
-    for case h(.) = ||.||^2, ∇h(.) = 2 ., [∇h]^-1(.) = 0.5 * .
+    for case h(.) = ||.||^2, ∇h(.) = 2 .,
+    h*(.) = ||.||^2 / 4, [∇h*](.) = [∇h]^-1(.) = 0.5 * .
 
     or, when solved in correlation form, as ∇g for forward, ∇f for backward.
 
@@ -49,9 +59,13 @@ def transport(self, vec: jnp.ndarray, forward: bool = True) -> jnp.ndarray:
       The transported points.
     """
     vec = jnp.atleast_2d(vec)
-    if self._cor:
+    if self._cor and isinstance(self.cost_fn, costs.SqEuclidean):
       return self._grad_g(vec) if forward else self._grad_f(vec)
-    return vec - 0.5 * (self._grad_f(vec) if forward else self._grad_g(vec))
+    grad_h_inv = jax.vmap(jax.grad(self.cost_fn.h_legendre))
+    if forward:
+      return vec - grad_h_inv(self._grad_f(vec))
+    else:
+      return vec - grad_h_inv(self._grad_g(vec))
 
   def distance(self, src: jnp.ndarray, tgt: jnp.ndarray) -> float:
     """Evaluate 2-Wasserstein distance between samples using dual potentials.
@@ -64,8 +78,7 @@ def distance(self, src: jnp.ndarray, tgt: jnp.ndarray) -> float:
       tgt: Samples from the target distribution, array of shape ``[m, d]``.
 
     Returns:
-      Wasserstein distance :math:`W^2_2`, assuming :math:`|x-y|^2` as the
-      ground distance.
+      Wasserstein distance.
     """
     src, tgt = jnp.atleast_2d(src), jnp.atleast_2d(tgt)
 
@@ -85,9 +98,6 @@ def distance(self, src: jnp.ndarray, tgt: jnp.ndarray) -> float:
       C += jnp.mean(g(tgt))
       return C
 
-    # compute the final Wasserstein distance assuming ground metric |x-y|^2,
-    # thus an additional multiplication by 2
-
   @property
   def f(self) -> Potential_t:
     """The first dual potential function."""
@@ -141,7 +151,7 @@ def __init__(
 
     # we pass directly the arrays and override the properties
     # since only the properties need to be callable
-    super().__init__(f, g)
+    super().__init__(f, g, cost_fn=geom.cost_fn, cor=False)
     self._geom = geom
     self._a = a
     self._b = b
@@ -160,16 +170,13 @@ def _create_potential_function(
 
     def callback(x: jnp.ndarray) -> float:
       cost = pointcloud.PointCloud(
-          jnp.atleast_2d(x),
-          y,
-          cost_fn=self._geom.cost_fn,
-          power=self._geom.power,
-          epsilon=1.0  #  epsilon is not used
+          jnp.atleast_2d(x), y, cost_fn=self._geom.cost_fn
       ).cost_matrix
-      return -eps * jsp.special.logsumexp((potential - cost) / eps,
-                                          b=prob_weights)
+      z = (potential - cost) / epsilon
+      lse = -epsilon * jsp.special.logsumexp(z, b=prob_weights, axis=-1)
+      return jnp.squeeze(lse)
 
-    eps = self.epsilon
+    epsilon = self.epsilon
     if kind == "g":
       # When seeking to evaluate 2nd potential function, 1st set of potential
       # values and support should be used,

diff --git a/ott/core/quad_problems.py b/ott/core/quad_problems.py
@@ -21,6 +21,7 @@
 # Because Protocol is not available in Python < 3.8
 from typing_extensions import Literal, Protocol
 
+from ott.core import _math_utils as mu
 from ott.core import linear_problems, sinkhorn_lr
 from ott.geometry import epsilon_scheduler, geometry, low_rank, pointcloud
 
@@ -58,16 +59,17 @@ class GWLoss(NamedTuple):
 def make_square_loss() -> GWLoss:
   f1 = Loss(lambda x: x ** 2, is_linear=False)
   f2 = Loss(lambda y: y ** 2, is_linear=False)
-  h1 = Loss(lambda x: x, is_linear=True)
-  h2 = Loss(lambda y: 2.0 * y, is_linear=True)
+  h1 = Loss(lambda x: jnp.sqrt(2) * x, is_linear=True)
+  h2 = Loss(lambda y: jnp.sqrt(2) * y, is_linear=True)
   return GWLoss(f1, f2, h1, h2)
 
 
-def make_kl_loss(clipping_value: float = 1e-8) -> GWLoss:
+def make_kl_loss(clipping_value: Optional[float] = None) -> GWLoss:
+  assert clipping_value is None, "Clipping deprecated in KL definition."
   f1 = Loss(lambda x: -jax.scipy.special.entr(x) - x, is_linear=False)
   f2 = Loss(lambda y: y, is_linear=True)
   h1 = Loss(lambda x: x, is_linear=True)
-  h2 = Loss(lambda y: jnp.log(jnp.clip(y, clipping_value)), is_linear=False)
+  h2 = Loss(lambda y: mu.safe_log(y), is_linear=False)
   return GWLoss(f1, f2, h1, h2)
 
 

diff --git a/ott/geometry/costs.py b/ott/geometry/costs.py
@@ -47,7 +47,7 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     pass
 
   def barycenter(self, weights: jnp.ndarray, xs: jnp.ndarray) -> float:
-    pass
+    raise NotImplementedError("Barycenter not yet implemented for this cost.")
 
   @classmethod
   def padder(cls, dim: int) -> jnp.ndarray:
@@ -90,17 +90,88 @@ def tree_unflatten(cls, aux_data, children):
     return cls(*children)
 
 
+@jax.tree_util.register_pytree_node_class
+class RBFCost(CostFn):
+  """A radial-basis function cost class for translation invariant costs.
+
+  Such costs are defined as
+
+  c(x,y) = h(z), where z := x-y.
+
+  where h is a function strictly convex (or concave) function mapping vectors
+  to real-values.
+
+  For completeness (and differentiation using the Brenier theorem), the user
+  is also supposed to provide the Legendre transform of `h`, whose gradient (the
+  inverse of the gradient of `h`) will be used to form a Brenier map.
+  """
+
+  def h(self, z: jnp.ndarray) -> float:
+    pass
+
+  def h_legendre(self, z: jnp.ndarray) -> float:
+    pass
+
+  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+    """Evaluate h on difference between x and y."""
+    return self.h(x - y)
+
+  def tree_flatten(self):
+    return (), None
+
+  def barycenter(self, weights: jnp.ndarray, xs: jnp.ndarray) -> float:
+    pass
+
+  @classmethod
+  def tree_unflatten(cls, aux_data, children):
+    del aux_data
+    return cls(*children)
+
+
+@jax.tree_util.register_pytree_node_class
+class SqPNorm(RBFCost):
+  """Squared p-norm of the difference of two vectors.
+
+  For details on the derivation of the Legendre transform of the norm, see e.g.
+  the reference :cite:`boyd:04`, p.93/94.
+  https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf
+  """
+  p: float
+
+  def __init__(self, p: float):
+    self.p = p
+    self.q = 1. / (1 - 1 / self.p)
+
+  def h(self, z: jnp.ndarray) -> float:
+    return 0.5 * jnp.linalg.norm(z, self.p) ** 2
+
+  def h_legendre(self, z: jnp.ndarray) -> float:
+    return 0.5 * jnp.linalg.norm(z, self.q) ** 2
+
+  def tree_flatten(self):
+    return (), (self.p,)
+
+  @classmethod
+  def tree_unflatten(cls, aux_data, children):
+    del children
+    return cls(aux_data[0])
+
+
 @jax.tree_util.register_pytree_node_class
 class Euclidean(CostFn):
-  """Euclidean distance."""
+  """Euclidean distance.
+
+  Note that the Euclidean distance is not cast as a RBF cost, because this
+  would correspond to `h = abs`, whose gradient is not invertible.
+  """
 
   def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute Euclidean norm."""
     return jnp.linalg.norm(x - y)
 
 
 @jax.tree_util.register_pytree_node_class
-class SqEuclidean(CostFn):
+class SqEuclidean(RBFCost):
   """Squared Euclidean distance."""
 
   def norm(self, x: jnp.ndarray) -> Union[float, jnp.ndarray]:
@@ -111,6 +182,12 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute minus twice the dot-product between vectors."""
     return -2. * jnp.vdot(x, y)
 
+  def h(self, z: jnp.ndarray) -> float:
+    return jnp.sum(z ** 2)
+
+  def h_legendre(self, z: jnp.ndarray) -> float:
+    return 0.25 * jnp.sum(z ** 2)
+
   def barycenter(self, weights: jnp.ndarray, xs: jnp.ndarray) -> jnp.ndarray:
     """Output barycenter of vectors when using squared-Euclidean distance."""
     return jnp.average(xs, weights=weights, axis=0)
@@ -134,9 +211,6 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     # similarity is in [-1, 1], clip because of numerical imprecisions
     return jnp.clip(cosine_distance, 0., 2.)
 
-  def barycenter(self, weights: jnp.ndarray, xs: jnp.ndarray) -> float:
-    raise NotImplementedError("Barycenter for cosine cost not yet implemented.")
-
   @classmethod
   def padder(cls, dim: int) -> jnp.ndarray:
     return jnp.ones((1, dim))