From 047dbfd1dbfef98f5aa7b23cbd26a010dc962870 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:54:34 +0000 Subject: [PATCH 1/9] Initial plan From 0d5d3988e0bb0740d49fb101a9d0aab75513f0fe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:00:08 +0000 Subject: [PATCH 2/9] Add waterfall chart helper and focused tests Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/e6e8e7c7-c0a1-47b3-9763-5ebb04affcb9 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _unittests/ut_tools/test_pandas.py | 20 +++++++- teachpyx/tools/pandas.py | 81 ++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/_unittests/ut_tools/test_pandas.py b/_unittests/ut_tools/test_pandas.py index 4833207..ae1d397 100644 --- a/_unittests/ut_tools/test_pandas.py +++ b/_unittests/ut_tools/test_pandas.py @@ -1,6 +1,7 @@ import unittest +import pandas from teachpyx.ext_test_case import ExtTestCase -from teachpyx.tools.pandas import read_csv_cached +from teachpyx.tools.pandas import plot_waterfall, read_csv_cached class TestPandas(ExtTestCase): @@ -14,6 +15,23 @@ def test_read_csv_cached(self): self.assertEqual(df.shape, df2.shape) self.assertEqual(list(df.columns), list(df2.columns)) + def test_plot_waterfall(self): + df = pandas.DataFrame( + { + "name": ["A", "B", "C"], + "delta": [10, -3, 5], + } + ) + ax, plot_df = plot_waterfall(df, "delta", "name", total_label="TOTAL") + self.assertEqual(ax.__class__.__name__, "Axes") + self.assertEqual(list(plot_df["label"]), ["A", "B", "C", "TOTAL"]) + self.assertEqual(list(plot_df["start"]), [0.0, 10.0, 7.0, 0.0]) + self.assertEqual(list(plot_df["end"]), [10.0, 7.0, 12.0, 12.0]) + + def test_plot_waterfall_missing_column(self): + df = pandas.DataFrame({"name": ["A"], "delta": [1]}) + self.assertRaise(lambda: plot_waterfall(df, "missing", "name"), ValueError) + if __name__ == "__main__": unittest.main() diff --git a/teachpyx/tools/pandas.py b/teachpyx/tools/pandas.py index ca5f08b..45fb147 100644 --- a/teachpyx/tools/pandas.py +++ b/teachpyx/tools/pandas.py @@ -2,6 +2,7 @@ import os import re from pathlib import Path +from typing import Optional, Tuple from urllib.parse import urlparse, unquote import pandas @@ -46,3 +47,83 @@ def read_csv_cached( df = pandas.read_csv(filepath_or_buffer, **kwargs) df.to_csv(cache_name, index=False) return df + + +def plot_waterfall( + data: pandas.DataFrame, + value_column: str, + label_column: Optional[str] = None, + total_label: str = "total", + ax=None, + colors: Tuple[str, str, str] = ("#2ca02c", "#d62728", "#1f77b4"), +): + """ + Draws a waterfall chart from a dataframe. + + :param data: dataframe containing increments + :param value_column: column with increments + :param label_column: column with labels, index is used if None + :param total_label: label used for the final total + :param ax: existing axis or None to create one + :param colors: positive, negative, total colors + :return: axis, computed dataframe used to draw the chart + + .. versionadded:: 0.6.1 + """ + if value_column not in data.columns: + raise ValueError(f"Unable to find column {value_column!r} in dataframe.") + if label_column is not None and label_column not in data.columns: + raise ValueError(f"Unable to find column {label_column!r} in dataframe.") + if len(colors) != 3: + raise ValueError(f"colors must contain 3 values, not {len(colors)}.") + + values = pandas.to_numeric(data[value_column], errors="raise").astype(float) + labels = data[label_column] if label_column is not None else data.index + labels = labels.astype(str) + + starts = values.cumsum().shift(1, fill_value=0.0) + plot_df = pandas.DataFrame( + { + "label": labels, + "value": values, + "start": starts, + "end": starts + values, + "kind": "variation", + } + ) + + total = float(values.sum()) if len(values) > 0 else 0.0 + total_row = pandas.DataFrame( + { + "label": [total_label], + "value": [total], + "start": [0.0], + "end": [total], + "kind": ["total"], + } + ) + plot_df = pandas.concat([plot_df, total_row], axis=0, ignore_index=True) + + if ax is None: + import matplotlib.pyplot as plt + + _, ax = plt.subplots(1, 1) + + bar_colors = [ + colors[2] + if kind == "total" + else (colors[0] if value >= 0 else colors[1]) + for value, kind in zip(plot_df["value"], plot_df["kind"]) + ] + ax.bar( + plot_df["label"], + plot_df["value"], + bottom=plot_df["start"], + color=bar_colors, + ) + + ax.axhline(0, color="black", linewidth=0.8) + ax.set_ylabel(value_column) + ax.set_xlabel(label_column or "index") + + return ax, plot_df From eb76c600c8799f287bbf1f423f062ddc690d37d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:01:24 +0000 Subject: [PATCH 3/9] Add extra validation tests for waterfall helper Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/e6e8e7c7-c0a1-47b3-9763-5ebb04affcb9 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _unittests/ut_tools/test_pandas.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/_unittests/ut_tools/test_pandas.py b/_unittests/ut_tools/test_pandas.py index ae1d397..d7ba2c2 100644 --- a/_unittests/ut_tools/test_pandas.py +++ b/_unittests/ut_tools/test_pandas.py @@ -32,6 +32,17 @@ def test_plot_waterfall_missing_column(self): df = pandas.DataFrame({"name": ["A"], "delta": [1]}) self.assertRaise(lambda: plot_waterfall(df, "missing", "name"), ValueError) + def test_plot_waterfall_missing_label_column(self): + df = pandas.DataFrame({"name": ["A"], "delta": [1]}) + self.assertRaise(lambda: plot_waterfall(df, "delta", "missing"), ValueError) + + def test_plot_waterfall_bad_colors(self): + df = pandas.DataFrame({"name": ["A"], "delta": [1]}) + self.assertRaise( + lambda: plot_waterfall(df, "delta", "name", colors=("r",)), + ValueError, + ) + if __name__ == "__main__": unittest.main() From f34b7a65c40fb8ef9b5eca192cfa23418a2bf925 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:02:22 +0000 Subject: [PATCH 4/9] Refine waterfall validation and test assertions Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/e6e8e7c7-c0a1-47b3-9763-5ebb04affcb9 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _unittests/ut_tools/test_pandas.py | 12 ++++++------ teachpyx/tools/pandas.py | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/_unittests/ut_tools/test_pandas.py b/_unittests/ut_tools/test_pandas.py index d7ba2c2..d6a2b8d 100644 --- a/_unittests/ut_tools/test_pandas.py +++ b/_unittests/ut_tools/test_pandas.py @@ -30,18 +30,18 @@ def test_plot_waterfall(self): def test_plot_waterfall_missing_column(self): df = pandas.DataFrame({"name": ["A"], "delta": [1]}) - self.assertRaise(lambda: plot_waterfall(df, "missing", "name"), ValueError) + with self.assertRaises(ValueError): + plot_waterfall(df, "missing", "name") def test_plot_waterfall_missing_label_column(self): df = pandas.DataFrame({"name": ["A"], "delta": [1]}) - self.assertRaise(lambda: plot_waterfall(df, "delta", "missing"), ValueError) + with self.assertRaises(ValueError): + plot_waterfall(df, "delta", "missing") def test_plot_waterfall_bad_colors(self): df = pandas.DataFrame({"name": ["A"], "delta": [1]}) - self.assertRaise( - lambda: plot_waterfall(df, "delta", "name", colors=("r",)), - ValueError, - ) + with self.assertRaises(ValueError): + plot_waterfall(df, "delta", "name", colors=("r",)) if __name__ == "__main__": diff --git a/teachpyx/tools/pandas.py b/teachpyx/tools/pandas.py index 45fb147..9b1ca8d 100644 --- a/teachpyx/tools/pandas.py +++ b/teachpyx/tools/pandas.py @@ -77,7 +77,12 @@ def plot_waterfall( if len(colors) != 3: raise ValueError(f"colors must contain 3 values, not {len(colors)}.") - values = pandas.to_numeric(data[value_column], errors="raise").astype(float) + try: + values = pandas.to_numeric(data[value_column], errors="raise").astype(float) + except ValueError as exc: + raise ValueError( + f"Column {value_column!r} cannot be converted to numeric values." + ) from exc labels = data[label_column] if label_column is not None else data.index labels = labels.astype(str) From f79dc9516b0294dfeecf9dd1effc6f400860dc94 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:03:23 +0000 Subject: [PATCH 5/9] Polish waterfall implementation per review Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/e6e8e7c7-c0a1-47b3-9763-5ebb04affcb9 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _unittests/ut_tools/test_pandas.py | 3 ++- teachpyx/tools/pandas.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/_unittests/ut_tools/test_pandas.py b/_unittests/ut_tools/test_pandas.py index d6a2b8d..035eba1 100644 --- a/_unittests/ut_tools/test_pandas.py +++ b/_unittests/ut_tools/test_pandas.py @@ -1,4 +1,5 @@ import unittest +from matplotlib.axes import Axes import pandas from teachpyx.ext_test_case import ExtTestCase from teachpyx.tools.pandas import plot_waterfall, read_csv_cached @@ -23,7 +24,7 @@ def test_plot_waterfall(self): } ) ax, plot_df = plot_waterfall(df, "delta", "name", total_label="TOTAL") - self.assertEqual(ax.__class__.__name__, "Axes") + self.assertIsInstance(ax, Axes) self.assertEqual(list(plot_df["label"]), ["A", "B", "C", "TOTAL"]) self.assertEqual(list(plot_df["start"]), [0.0, 10.0, 7.0, 0.0]) self.assertEqual(list(plot_df["end"]), [10.0, 7.0, 12.0, 12.0]) diff --git a/teachpyx/tools/pandas.py b/teachpyx/tools/pandas.py index 9b1ca8d..0a3c34c 100644 --- a/teachpyx/tools/pandas.py +++ b/teachpyx/tools/pandas.py @@ -97,7 +97,7 @@ def plot_waterfall( } ) - total = float(values.sum()) if len(values) > 0 else 0.0 + total = float(values.sum()) total_row = pandas.DataFrame( { "label": [total_label], From bbdac40fd4414a0f5f0d11b892a3b26f1f30c732 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:04:26 +0000 Subject: [PATCH 6/9] Add non-numeric input test for waterfall helper Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/e6e8e7c7-c0a1-47b3-9763-5ebb04affcb9 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _unittests/ut_tools/test_pandas.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/_unittests/ut_tools/test_pandas.py b/_unittests/ut_tools/test_pandas.py index 035eba1..d5c3c97 100644 --- a/_unittests/ut_tools/test_pandas.py +++ b/_unittests/ut_tools/test_pandas.py @@ -44,6 +44,11 @@ def test_plot_waterfall_bad_colors(self): with self.assertRaises(ValueError): plot_waterfall(df, "delta", "name", colors=("r",)) + def test_plot_waterfall_not_numeric(self): + df = pandas.DataFrame({"name": ["A"], "delta": ["x"]}) + with self.assertRaises(ValueError): + plot_waterfall(df, "delta", "name") + if __name__ == "__main__": unittest.main() From 90a5382eeac5f0d247279b34e97de5fc91483383 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:32:29 +0000 Subject: [PATCH 7/9] Remove try/except from waterfall numeric conversion Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/51a3850a-5547-4962-a7d6-829066c3c6ff Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- teachpyx/tools/pandas.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/teachpyx/tools/pandas.py b/teachpyx/tools/pandas.py index 0a3c34c..8962704 100644 --- a/teachpyx/tools/pandas.py +++ b/teachpyx/tools/pandas.py @@ -77,12 +77,7 @@ def plot_waterfall( if len(colors) != 3: raise ValueError(f"colors must contain 3 values, not {len(colors)}.") - try: - values = pandas.to_numeric(data[value_column], errors="raise").astype(float) - except ValueError as exc: - raise ValueError( - f"Column {value_column!r} cannot be converted to numeric values." - ) from exc + values = pandas.to_numeric(data[value_column], errors="raise").astype(float) labels = data[label_column] if label_column is not None else data.index labels = labels.astype(str) From ac74fceb318f669eb0e195e3c514ffa796c5f594 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:40:08 +0000 Subject: [PATCH 8/9] Add concrete documentation example for plot_waterfall Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/242bb04a-1d52-4f54-a254-e242a90f9ed5 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _doc/api/tools/pandas.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/_doc/api/tools/pandas.rst b/_doc/api/tools/pandas.rst index 58b06a0..9117d5b 100644 --- a/_doc/api/tools/pandas.rst +++ b/_doc/api/tools/pandas.rst @@ -2,6 +2,24 @@ teachpyx.tools.pandas ===================== +Exemple +------- + +.. plot:: + + import pandas + import matplotlib.pyplot as plt + from teachpyx.tools.pandas import plot_waterfall + + plt.close("all") + + df = pandas.DataFrame({"name": ["A", "B", "C"], "delta": [10, -3, 5]}) + ax, _ = plot_waterfall(df, "delta", "name", total_label="TOTAL") + ax.set_title("Exemple de waterfall") + plt.xticks(rotation=30, ha="right") + plt.tight_layout() + plt.show() + .. automodule:: teachpyx.tools.pandas :members: :no-undoc-members: From c22383e50c15fed44e8b28e78fff2afa60b791b4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:41:08 +0000 Subject: [PATCH 9/9] Polish waterfall doc example wording Agent-Logs-Url: https://github.com/sdpython/teachpyx/sessions/242bb04a-1d52-4f54-a254-e242a90f9ed5 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- _doc/api/tools/pandas.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_doc/api/tools/pandas.rst b/_doc/api/tools/pandas.rst index 9117d5b..352314a 100644 --- a/_doc/api/tools/pandas.rst +++ b/_doc/api/tools/pandas.rst @@ -2,7 +2,7 @@ teachpyx.tools.pandas ===================== -Exemple +Example ------- .. plot:: @@ -15,7 +15,7 @@ Exemple df = pandas.DataFrame({"name": ["A", "B", "C"], "delta": [10, -3, 5]}) ax, _ = plot_waterfall(df, "delta", "name", total_label="TOTAL") - ax.set_title("Exemple de waterfall") + ax.set_title("Example waterfall") plt.xticks(rotation=30, ha="right") plt.tight_layout() plt.show()