From 2d54023698b14a3cc166cdad3940edd358a60316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 13:35:19 +0200 Subject: [PATCH 1/8] solve proper optimization problem for legend positions --- dufte/main.py | 53 ++++++++++++++++++++++----------------------------- setup.cfg | 4 ++-- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/dufte/main.py b/dufte/main.py index 88fb389..10935c4 100644 --- a/dufte/main.py +++ b/dufte/main.py @@ -2,6 +2,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt +import numpy # dufte is used via perfplot on stackoverflow which has a light (#fffff) and a dark # (#2d2d2d) variant. The midpoint, #969696, should be well readable on both. (And stays @@ -74,44 +75,36 @@ def _argsort(seq): return sorted(range(len(seq)), key=seq.__getitem__) -def _move_min_distance(targets, min_distance, eps=1.0e-5): +def _move_min_distance(targets, min_distance): """Move the targets such that they are close to their original positions, but keep min_distance apart. - - We actually need to solve a convex optimization problem with nonlinear constraints - here, see . This algorithm is very - simplistic. """ + # https://math.stackexchange.com/a/3705240/36678 + import scipy.optimize + + # sort targets idx = _argsort(targets) targets = sorted(targets) - while True: - # Form groups of targets that must be moved together. - groups = [[targets[0]]] - for t in targets[1:]: - if abs(t - groups[-1][-1]) > min_distance - eps: - groups.append([]) - groups[-1].append(t) - - if all(len(g) == 1 for g in groups): - break - - targets = [] - for group in groups: - # Minimize - # 1/2 sum_i (x_i + a - target) ** 2 - # over a for a group of labels - n = len(group) - pos = [k * min_distance for k in range(n)] - a = sum(t - p for t, p in zip(group, pos)) / n - if len(targets) > 0 and targets[-1] > pos[0] + a: - a = targets[-1] - pos[0] - eps - new_pos = [p + a for p in pos] - targets += new_pos + n = len(targets) + x0_min = targets[0] - n * min_distance + A = numpy.tril(numpy.ones([n, n])) + b = targets.copy() + for i in range(n): + b[i] -= x0_min + i * min_distance + + out, _ = scipy.optimize.nnls(A, b) + sol = numpy.empty(n) + sol[0] = out[0] + x0_min + for k in range(1, n): + sol[k] = sol[0] + sum(out[1:k + 1]) + k * min_distance + + # reorder idx2 = [idx.index(k) for k in range(len(idx))] - targets = [targets[i] for i in idx2] - return targets + sol = [sol[i] for i in idx2] + + return sol def legend(ax=None, min_label_distance="auto", alpha=1.4): diff --git a/setup.cfg b/setup.cfg index 04dfb36..08b6e39 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,10 +27,10 @@ classifiers = [options] packages = find: -# importlib_metadata can be removed when we support Python 3.8+ only install_requires = - importlib_metadata + importlib_metadata;python_version<"3.8" matplotlib + numpy python_requires = >=3.5 setup_requires = setuptools>=42 From 536bc62038a7bf98bdc551403c7724898b5e91ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:13:20 +0200 Subject: [PATCH 2/8] drop scipy dependency --- dufte/main.py | 14 +++++++++----- dufte/optimize.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 dufte/optimize.py diff --git a/dufte/main.py b/dufte/main.py index 10935c4..5991b57 100644 --- a/dufte/main.py +++ b/dufte/main.py @@ -4,6 +4,8 @@ import matplotlib.pyplot as plt import numpy +from .optimize import nnls + # dufte is used via perfplot on stackoverflow which has a light (#fffff) and a dark # (#2d2d2d) variant. The midpoint, #969696, should be well readable on both. (And stays # in the background, like a grid should.) @@ -78,10 +80,9 @@ def _argsort(seq): def _move_min_distance(targets, min_distance): """Move the targets such that they are close to their original positions, but keep min_distance apart. - """ - # https://math.stackexchange.com/a/3705240/36678 - import scipy.optimize + https://math.stackexchange.com/a/3705240/36678 + """ # sort targets idx = _argsort(targets) targets = sorted(targets) @@ -93,12 +94,15 @@ def _move_min_distance(targets, min_distance): for i in range(n): b[i] -= x0_min + i * min_distance - out, _ = scipy.optimize.nnls(A, b) + # import scipy.optimize + # out, _ = scipy.optimize.nnls(A, b) + + out = nnls(A, b) sol = numpy.empty(n) sol[0] = out[0] + x0_min for k in range(1, n): - sol[k] = sol[0] + sum(out[1:k + 1]) + k * min_distance + sol[k] = sol[0] + sum(out[1 : k + 1]) + k * min_distance # reorder idx2 = [idx.index(k) for k in range(len(idx))] diff --git a/dufte/optimize.py b/dufte/optimize.py new file mode 100644 index 0000000..d05f93e --- /dev/null +++ b/dufte/optimize.py @@ -0,0 +1,41 @@ +import numpy + + +def nnls(A, b, eps=1.0e-10, max_steps=100): + # non-negative least-squares after + # + A = numpy.asarray(A) + b = numpy.asarray(b) + + AtA = A.T @ A + Atb = A.T @ b + + m, n = A.shape + assert m == b.shape[0] + mask = numpy.zeros(n, dtype=bool) + x = numpy.zeros(n) + w = Atb + s = numpy.zeros(n) + k = 0 + while sum(mask) != n and max(w) > eps: + if k >= max_steps: + break + mask[numpy.argmax(w)] = True + + s[mask] = numpy.linalg.lstsq(AtA[mask][:, mask], Atb[mask], rcond=None)[0] + s[~mask] = 0.0 + + while numpy.min(s[mask]) <= 0: + alpha = numpy.min(x[mask] / (x[mask] - s[mask])) + x += alpha * (s - x) + mask[numpy.abs(x) < eps] = False + + s[mask] = numpy.linalg.lstsq(AtA[mask][:, mask], Atb[mask], rcond=None)[0] + s[~mask] = 0.0 + + x = s.copy() + w = Atb - AtA @ x + + k += 1 + + return x From ae8ed7c0725cee93ab0d75efc113d914988b3ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:16:03 +0200 Subject: [PATCH 3/8] more readme --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 11160da..3fda408 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,18 @@ Further reading: * [Wikipedia: Chartjunk](https://en.wikipedia.org/wiki/Chartjunk) +### Background +[![green-pi](https://img.shields.io/badge/Rendered%20with-Green%20Pi-00d571?style=flat-square)](https://github.com/nschloe/green-pi?activate&inlineMath=$) + +The position $x_i$ of the line annotations is computed as the solution of a non-negative +least-squares problem +$$ +\frac{1}{2}\sum_i (x_i - t_i)^2 \to \min_x,\\ +(x_i - x_j)^2 \ge a^2 \quad \forall i,j. +$$ +where $a$ is the minimum distance between two entries and $t_i$ is the target position. + + ### Testing To run the dufte unit tests, check out this repository and type From ce2d9c65f543434f03e3d9c8373782b9a4c90410 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:16:49 +0200 Subject: [PATCH 4/8] readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3fda408..46ee5e9 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,10 @@ Further reading: The position $x_i$ of the line annotations is computed as the solution of a non-negative least-squares problem $$ +\begin{align} \frac{1}{2}\sum_i (x_i - t_i)^2 \to \min_x,\\ (x_i - x_j)^2 \ge a^2 \quad \forall i,j. +\end{align} $$ where $a$ is the minimum distance between two entries and $t_i$ is the target position. From a0bd693c32ba34230b7f51b46e85042a71bab737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:17:13 +0200 Subject: [PATCH 5/8] readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 46ee5e9..d51fa7d 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ The position $x_i$ of the line annotations is computed as the solution of a non- least-squares problem $$ \begin{align} -\frac{1}{2}\sum_i (x_i - t_i)^2 \to \min_x,\\ +\frac{1}{2}\sum_i (x_i - t_i)^2 \to \min_x,\\\\ (x_i - x_j)^2 \ge a^2 \quad \forall i,j. \end{align} $$ From 697e391945ccfb4fd55b07c578a5aacbc99a819e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:38:54 +0200 Subject: [PATCH 6/8] fix distance computation --- dufte/main.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dufte/main.py b/dufte/main.py index 5991b57..2be61ef 100644 --- a/dufte/main.py +++ b/dufte/main.py @@ -111,22 +111,21 @@ def _move_min_distance(targets, min_distance): return sol -def legend(ax=None, min_label_distance="auto", alpha=1.4): +def legend(ax=None, min_label_distance="auto", alpha=1.0): ax = ax or plt.gca() fig = plt.gcf() - # fig.set_size_inches(12 / 9 * height, height) logy = ax.get_yscale() == "log" if min_label_distance == "auto": - # Make sure that the distance is alpha times the fontsize. This needs to be - # translated into axes units. - fig_height = fig.get_size_inches()[0] + # Make sure that the distance is alpha * fontsize. This needs to be translated + # into axes units. + fig_height_inches = fig.get_size_inches()[1] ax = plt.gca() ax_pos = ax.get_position() ax_height = ax_pos.y1 - ax_pos.y0 - ax_height_inches = ax_height * fig_height + ax_height_inches = ax_height * fig_height_inches ylim = ax.get_ylim() if logy: ax_height_ylim = math.log10(ylim[1]) - math.log10(ylim[0]) From b1e69a10711e8df9d2ef4ff67ad1753064c3d239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:53:21 +0200 Subject: [PATCH 7/8] slightly wider xticks --- dufte/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dufte/main.py b/dufte/main.py index 2be61ef..b90e404 100644 --- a/dufte/main.py +++ b/dufte/main.py @@ -16,6 +16,8 @@ # "Lights out": #000000 _gray = "969696" _stroke_width = 0.3 +# make the xticks slightly wider to make them easier to see +_xtick_width = 0.4 style = { "font.size": 14, @@ -32,7 +34,7 @@ "xtick.minor.top": False, "xtick.minor.bottom": False, "xtick.color": _gray, - "xtick.major.width": _stroke_width, + "xtick.major.width": _xtick_width, "axes.grid": True, "axes.grid.axis": "y", "grid.color": _gray, From f478c1f3f0c8f40f668377578a0c3fcd00f3ea60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20Schl=C3=B6mer?= Date: Thu, 4 Jun 2020 15:57:04 +0200 Subject: [PATCH 8/8] version bump --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 08b6e39..29041a4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = dufte -version = 0.2.5 +version = 0.2.6 author = Nico Schlömer author_email = nico.schloemer@gmail.com description = Clean matplotlib plots