Euclidean / SqEuclidean and changes in power (#157)

* Euclidean / SqEuclidean and changes in power * fix, following expose cost_fn in PointCloud * fix power=2 that were still there * fix * fix in plot, *needed* to update lines in anims.
ott-jax · Oct 19, 2022 · 8646692 · 8646692
1 parent 438eb11
commit 8646692
Show file tree

Hide file tree

Showing 18 changed files with 105 additions and 84 deletions.
diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ Currently implements the following classes and functions:
 
 -   The [geometry](ott/geometry) folder describes tools that to encapsulate the essential ingredients of OT problems: measures and cost functions.
 
-    -   The `CostFn` class in [costs.py](ott/geometry/costs.py) and its descendants define cost functions between points. A few simple costs are considered, `Euclidean` between vectors, and `Bures`, between a pair of mean vector and covariance (p.d.) matrix.
+    -   The `CostFn` class in [costs.py](ott/geometry/costs.py) and its descendants define cost functions between points. A few simple costs are considered, `SqEuclidean` between vectors, and `Bures`, between a pair of mean vector and covariance (p.d.) matrix.
 
     -   The `Geometry` class in [geometry.py](ott/geometry/geometry.py) describes a cost structure between two measures. That cost structure is accessed through various member functions, either used when running the Sinkhorn algorithm (typically kernel multiplications, or log-sum-exp row/column-wise application) or after (to apply the OT matrix to a vector).
 

diff --git a/docs/geometry.rst b/docs/geometry.rst
@@ -50,6 +50,7 @@ Cost Functions
     :toctree: _autosummary
 
     costs.CostFn
+    costs.SqEuclidean
     costs.Euclidean
     costs.Cosine
     costs.Bures

diff --git a/docs/notebooks/Sinkhorn_Barycenters.ipynb b/docs/notebooks/Sinkhorn_Barycenters.ipynb
@@ -425,7 +425,7 @@
     "id": "tqg86SFQvzXC"
    },
    "source": [
-    "### Euclidean barycenter, for reference"
+    "### SqEuclidean barycenter, for reference"
    ]
   },
   {

diff --git a/docs/notebooks/introduction_grid.ipynb b/docs/notebooks/introduction_grid.ipynb
@@ -309,8 +309,8 @@
    "outputs": [],
    "source": [
     "@jax.tree_util.register_pytree_node_class\n",
-    "class EuclideanTimes2(costs.CostFn):\n",
-    "    \"\"\"The cost function corresponding to the squared euclidean distance times 2.\"\"\"\n",
+    "class SqEuclideanTimes2(costs.CostFn):\n",
+    "    \"\"\"The cost function corresponding to the squared SqEuclidean distance times 2.\"\"\"\n",
     "\n",
     "    def norm(self, x):\n",
     "        return jnp.sum(x**2, axis=-1) * 2\n",
@@ -319,7 +319,7 @@
     "        return -2 * jnp.sum(x * y) * 2\n",
     "\n",
     "\n",
-    "cost_fns = [EuclideanTimes2(), costs.Euclidean()]"
+    "cost_fns = [SqEuclideanTimes2(), costs.SqEuclidean()]"
    ]
   },
   {

diff --git a/docs/notebooks/point_clouds.ipynb b/docs/notebooks/point_clouds.ipynb
@@ -85,7 +85,7 @@
    "source": [
     "## Computes the regularized optimal transport\n",
     "\n",
-    "To compute the transport matrix between the two point clouds, one can define a `PointCloud` geometry (which by default uses `ott.geometry.costs.Euclidean` for cost function), then call the `sinkhorn` function, and build the transport matrix from the optimized potentials."
+    "To compute the transport matrix between the two point clouds, one can define a `PointCloud` geometry (which by default uses `ott.geometry.costs.SqEuclidean` for cost function), then call the `sinkhorn` function, and build the transport matrix from the optimized potentials."
    ]
   },
   {
@@ -235,7 +235,7 @@
     "    y: jnp.ndarray,\n",
     "    a: jnp.ndarray,\n",
     "    b: jnp.ndarray,\n",
-    "    cost_fn=ott.geometry.costs.Euclidean(),\n",
+    "    cost_fn=ott.geometry.costs.SqEuclidean(),\n",
     "    num_iter: int = 101,\n",
     "    dump_every: int = 10,\n",
     "    learning_rate: float = 0.2,\n",
@@ -5972,7 +5972,9 @@
    "source": [
     "from IPython import display\n",
     "\n",
-    "ots = optimize(x, y, a, b, num_iter=100, cost_fn=ott.geometry.costs.Euclidean())\n",
+    "ots = optimize(\n",
+    "    x, y, a, b, num_iter=100, cost_fn=ott.geometry.costs.SqEuclidean()\n",
+    ")\n",
     "fig = plt.figure(figsize=(8, 5))\n",
     "plott = ott.tools.plot.Plot(fig=fig)\n",
     "anim = plott.animate(ots, frame_rate=4)\n",

diff --git a/ott/core/bar_problems.py b/ott/core/bar_problems.py
@@ -42,7 +42,7 @@ class BarycenterProblem:
     weights: Array of shape ``[num_measures,]`` containing the weights of the
       measures.
     cost_fn: Cost function used. If `None`,
-      use :class:`~ott.geometry.costs.Euclidean` cost.
+      use :class:`~ott.geometry.costs.SqEuclidean` cost.
     epsilon: Epsilon regularization used to solve reg-OT problems.
     debiased: **Currently not implemented.**
       Whether the problem is debiased, in the sense that
@@ -75,7 +75,7 @@ def __init__(
       raise ValueError("Specify weights if `y` is already segmented.")
     self._b = b
     self._weights = weights
-    self.cost_fn = costs.Euclidean() if cost_fn is None else cost_fn
+    self.cost_fn = costs.SqEuclidean() if cost_fn is None else cost_fn
     self.epsilon = epsilon
     self.debiased = debiased
     self._kwargs = kwargs
@@ -309,7 +309,7 @@ def update_features(self, transports: jnp.ndarray,
     """Update the barycenter features in the fused case :cite:`vayer:19`.
 
     Uses :cite:`cuturi:14` eq. 8, and is implemented only
-    for the squared :class:`~ott.geometry.costs.Euclidean` cost.
+    for the squared :class:`~ott.geometry.costs.SqEuclidean` cost.
 
     Args:
       transports: Transport maps of shape
@@ -330,7 +330,7 @@ def update_features(self, transports: jnp.ndarray,
     transports = transports * inv_a[None, :, None]
 
     if self._loss_name == "sqeucl":
-      cost = costs.Euclidean()
+      cost = costs.SqEuclidean()
       return jnp.sum(
           weights * barycentric_projection(transports, y_fused, cost), axis=0
       )

diff --git a/ott/core/potentials.py b/ott/core/potentials.py
@@ -162,7 +162,7 @@ def callback(x: jnp.ndarray) -> float:
       cost = pointcloud.PointCloud(
           jnp.atleast_2d(x),
           y,
-          cost_fn=self._geom._cost_fn,
+          cost_fn=self._geom.cost_fn,
           power=self._geom.power,
           epsilon=1.0  #  epsilon is not used
       ).cost_matrix

diff --git a/ott/geometry/costs.py b/ott/geometry/costs.py
@@ -92,7 +92,16 @@ def tree_unflatten(cls, aux_data, children):
 
 @jax.tree_util.register_pytree_node_class
 class Euclidean(CostFn):
-  """Squared Euclidean distance CostFn."""
+  """Euclidean distance."""
+
+  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+    """Compute Euclidean norm."""
+    return jnp.linalg.norm(x - y)
+
+
+@jax.tree_util.register_pytree_node_class
+class SqEuclidean(CostFn):
+  """Squared Euclidean distance."""
 
   def norm(self, x: jnp.ndarray) -> Union[float, jnp.ndarray]:
     """Compute squared Euclidean norm for vector."""

diff --git a/ott/geometry/grid.py b/ott/geometry/grid.py
@@ -101,7 +101,7 @@ def __init__(
       raise ValueError('Input either grid_size t-uple or grid locations x.')
 
     if cost_fns is None:
-      cost_fns = [costs.Euclidean()]
+      cost_fns = [costs.SqEuclidean()]
     self.cost_fns = cost_fns
     self.kwargs = {
         'num_a': self.num_a,

diff --git a/ott/geometry/pointcloud.py b/ott/geometry/pointcloud.py
@@ -65,7 +65,7 @@ def __init__(
       x: jnp.ndarray,
       y: Optional[jnp.ndarray] = None,
       cost_fn: Optional[costs.CostFn] = None,
-      power: float = 2.0,
+      power: float = 1.0,
       batch_size: Optional[int] = None,
       scale_cost: Union[bool, int, float,
                         Literal['mean', 'max_norm', 'max_bound', 'max_cost',
@@ -75,9 +75,9 @@ def __init__(
     super().__init__(**kwargs)
     self.x = x
     self.y = self.x if y is None else y
-    self._cost_fn = costs.Euclidean() if cost_fn is None else cost_fn
+    self.cost_fn = costs.SqEuclidean() if cost_fn is None else cost_fn
     self.power = power
-    self._axis_norm = 0 if callable(self._cost_fn.norm) else None
+    self._axis_norm = 0 if callable(self.cost_fn.norm) else None
     if batch_size is not None:
       assert batch_size > 0, f"`batch_size={batch_size}` must be positive."
     self._batch_size = batch_size
@@ -86,13 +86,13 @@ def __init__(
   @property
   def _norm_x(self) -> Union[float, jnp.ndarray]:
     if self._axis_norm == 0:
-      return self._cost_fn.norm(self.x)
+      return self.cost_fn.norm(self.x)
     return 0.
 
   @property
   def _norm_y(self) -> Union[float, jnp.ndarray]:
     if self._axis_norm == 0:
-      return self._cost_fn.norm(self.y)
+      return self.cost_fn.norm(self.y)
     return 0.
 
   @property
@@ -125,7 +125,7 @@ def is_symmetric(self) -> bool:
 
   @property
   def is_squared_euclidean(self) -> bool:
-    return isinstance(self._cost_fn, costs.Euclidean) and self.power == 2.0
+    return isinstance(self.cost_fn, costs.SqEuclidean) and self.power == 1.0
 
   @property
   def is_online(self) -> bool:
@@ -163,7 +163,7 @@ def inv_scale_cost(self) -> float:
           "the cost matrix with the online mode is not implemented."
       )
     if self._scale_cost == 'max_norm':
-      if self._cost_fn.norm is not None:
+      if self.cost_fn.norm is not None:
         return 1.0 / jnp.maximum(self._norm_x.max(), self._norm_y.max())
       return 1.0
     if self._scale_cost == 'max_bound':
@@ -183,11 +183,11 @@ def inv_scale_cost(self) -> float:
     raise ValueError(f'Scaling {self._scale_cost} not implemented.')
 
   def _compute_cost_matrix(self) -> jnp.ndarray:
-    cost_matrix = self._cost_fn.all_pairs_pairwise(self.x, self.y)
+    cost_matrix = self.cost_fn.all_pairs_pairwise(self.x, self.y)
     if self._axis_norm is not None:
       cost_matrix += self._norm_x[:, jnp.newaxis] + self._norm_y[jnp.newaxis, :]
-    if self.power != 2.0:
-      cost_matrix = jnp.abs(cost_matrix) ** (0.5 * self.power)
+    if self.power != 1.0:
+      cost_matrix = jnp.abs(cost_matrix) ** self.power
     return cost_matrix
 
   def apply_lse_kernel(
@@ -212,7 +212,7 @@ def body0(carry, i: int):
             self._norm_y, (i * self.batch_size,), (self.batch_size,)
         )
       h_res, h_sgn = app(
-          self.x, y, self._norm_x, norm_y, f, g_, eps, vec, self._cost_fn,
+          self.x, y, self._norm_x, norm_y, f, g_, eps, vec, self.cost_fn,
           self.power, self.inv_scale_cost
       )
       return carry, (h_res, h_sgn)
@@ -230,7 +230,7 @@ def body1(carry, i: int):
             self._norm_x, (i * self.batch_size,), (self.batch_size,)
         )
       h_res, h_sgn = app(
-          self.y, x, self._norm_y, norm_x, g, f_, eps, vec, self._cost_fn,
+          self.y, x, self._norm_y, norm_x, g, f_, eps, vec, self.cost_fn,
           self.power, self.inv_scale_cost
       )
       return carry, (h_res, h_sgn)
@@ -240,12 +240,12 @@ def finalize(i: int):
         norm_y = self._norm_y if self._axis_norm is None else self._norm_y[i:]
         return app(
             self.x, self.y[i:], self._norm_x, norm_y, f, g[i:], eps, vec,
-            self._cost_fn, self.power, self.inv_scale_cost
+            self.cost_fn, self.power, self.inv_scale_cost
         )
       norm_x = self._norm_x if self._axis_norm is None else self._norm_x[i:]
       return app(
           self.y, self.x[i:], self._norm_y, norm_x, g, f[i:], eps, vec,
-          self._cost_fn, self.power, self.inv_scale_cost
+          self.cost_fn, self.power, self.inv_scale_cost
       )
 
     if not self.is_online:
@@ -297,12 +297,12 @@ def apply_kernel(
     if axis == 0:
       return app(
           self.x, self.y, self._norm_x, self._norm_y, scaling, eps,
-          self._cost_fn, self.power, self.inv_scale_cost
+          self.cost_fn, self.power, self.inv_scale_cost
       )
     if axis == 1:
       return app(
           self.y, self.x, self._norm_y, self._norm_x, scaling, eps,
-          self._cost_fn, self.power, self.inv_scale_cost
+          self.cost_fn, self.power, self.inv_scale_cost
       )
 
   def transport_from_potentials(
@@ -318,7 +318,7 @@ def transport_from_potentials(
     )
     return transport(
         self.y, self.x, self._norm_y, self._norm_x, g, f, self.epsilon,
-        self._cost_fn, self.power, self.inv_scale_cost
+        self.cost_fn, self.power, self.inv_scale_cost
     )
 
   def transport_from_scalings(
@@ -334,7 +334,7 @@ def transport_from_scalings(
     )
     return transport(
         self.y, self.x, self._norm_y, self._norm_x, v, u, self.epsilon,
-        self._cost_fn, self.power, self.inv_scale_cost
+        self.cost_fn, self.power, self.inv_scale_cost
     )
 
   def apply_cost(
@@ -387,12 +387,12 @@ def _apply_cost(
         arr = arr.reshape(-1, 1)
       if axis == 0:
         return app(
-            self.x, self.y, self._norm_x, self._norm_y, arr, self._cost_fn,
+            self.x, self.y, self._norm_x, self._norm_y, arr, self.cost_fn,
             self.power, self.inv_scale_cost, fn
         )
       if axis == 1:
         return app(
-            self.y, self.x, self._norm_y, self._norm_x, arr, self._cost_fn,
+            self.y, self.x, self._norm_y, self._norm_x, arr, self.cost_fn,
             self.power, self.inv_scale_cost, fn
         )
     else:
@@ -464,7 +464,7 @@ def body0(carry, i: int):
       else:
         norm_y = self._leading_slice(self._norm_y, i)
       h_res = app(
-          self.x, y, self._norm_x, norm_y, vec, self._cost_fn, self.power,
+          self.x, y, self._norm_x, norm_y, vec, self.cost_fn, self.power,
           scale_cost
       )
       return carry, h_res
@@ -477,7 +477,7 @@ def body1(carry, i: int):
       else:
         norm_x = self._leading_slice(self._norm_x, i)
       h_res = app(
-          self.y, x, self._norm_y, norm_x, vec, self._cost_fn, self.power,
+          self.y, x, self._norm_y, norm_x, vec, self.cost_fn, self.power,
           scale_cost
       )
       return carry, h_res
@@ -486,12 +486,12 @@ def finalize(i: int):
       if batch_for_y:
         norm_y = self._norm_y if self._axis_norm is None else self._norm_y[i:]
         return app(
-            self.x, self.y[i:], self._norm_x, norm_y, vec, self._cost_fn,
+            self.x, self.y[i:], self._norm_x, norm_y, vec, self.cost_fn,
             self.power, scale_cost
         )
       norm_x = self._norm_x if self._axis_norm is None else self._norm_x[i:]
       return app(
-          self.y, self.x[i:], self._norm_y, norm_x, vec, self._cost_fn,
+          self.y, self.x[i:], self._norm_y, norm_x, vec, self.cost_fn,
           self.power, scale_cost
       )
 
@@ -532,9 +532,9 @@ def finalize(i: int):
     )
 
   def barycenter(self, weights: jnp.ndarray) -> jnp.ndarray:
-    """Compute barycenter of points in self.x using weights, valid for p=2.0."""
-    assert self.power == 2.0, self.power
-    return self._cost_fn.barycenter(self.x, weights)
+    """Compute barycenter of points in self.x using weights, valid for p=1.0."""
+    assert self.power == 1.0, self.power
+    return self.cost_fn.barycenter(self.x, weights)
 
   @classmethod
   def prepare_divergences(
@@ -560,7 +560,7 @@ def prepare_divergences(
 
   def tree_flatten(self):
     # passing self.power in aux_data to be able to condition on it.
-    return ([self.x, self.y, self._src_mask, self._tgt_mask, self._cost_fn], {
+    return ([self.x, self.y, self._src_mask, self._tgt_mask, self.cost_fn], {
         'epsilon': self._epsilon_init,
         'relative_epsilon': self._relative_epsilon,
         'scale_epsilon': self._scale_epsilon,
@@ -577,14 +577,14 @@ def tree_unflatten(cls, aux_data, children):
     )
 
   def _cosine_to_sqeucl(self) -> 'PointCloud':
-    assert isinstance(self._cost_fn, costs.Cosine), type(self._cost_fn)
-    assert self.power == 2, self.power
+    assert isinstance(self.cost_fn, costs.Cosine), type(self.cost_fn)
+    assert self.power == 1.0, self.power
     (x, y, *args, _), aux_data = self.tree_flatten()
     x = x / jnp.linalg.norm(x, axis=-1, keepdims=True)
     y = y / jnp.linalg.norm(y, axis=-1, keepdims=True)
     # TODO(michalk8): find a better way
     aux_data["scale_cost"] = 2. / self.inv_scale_cost
-    cost_fn = costs.Euclidean()
+    cost_fn = costs.SqEuclidean()
     return type(self).tree_unflatten(aux_data, [x, y] + args + [cost_fn])
 
   def to_LRCGeometry(
@@ -767,8 +767,8 @@ def _transport_from_scalings_xy(
 def _cost(x, y, norm_x, norm_y, cost_fn, cost_pow, scale_cost):
   one_line_pairwise = jax.vmap(cost_fn.pairwise, in_axes=[0, None])
   cost = norm_x + norm_y + one_line_pairwise(x, y)
-  if cost_pow != 2.0:
-    cost = jnp.abs(cost) ** (0.5 * cost_pow)
+  if cost_pow != 1.0:
+    cost = jnp.abs(cost) ** cost_pow
   return cost * scale_cost
 
 

diff --git a/ott/tools/k_means.py b/ott/tools/k_means.py
@@ -358,7 +358,7 @@ def k_means(
 
   Args:
     geom: Point cloud of shape ``[n, ndim]`` to cluster. If passed as an array,
-      :class:`~ott.geometry.costs.Euclidean` cost is assumed.
+      :class:`~ott.geometry.costs.SqEuclidean` cost is assumed.
     k: The number of clusters.
     weights: The weights of input points. These weights are considered when
       computing the centroids and inertia. If ``None``, use uniform weights.
@@ -388,7 +388,7 @@ def k_means(
       0] >= k, f"Cannot cluster `{geom.shape[0]}` points into `{k}` clusters."
   if isinstance(geom, jnp.ndarray):
     geom = pointcloud.PointCloud(geom)
-  if isinstance(geom._cost_fn, costs.Cosine):
+  if isinstance(geom.cost_fn, costs.Cosine):
     geom = geom._cosine_to_sqeucl()
   assert geom.is_squared_euclidean