From 792112fdffd01cf70d37882e30342b714ed9f0f4 Mon Sep 17 00:00:00 2001 From: stacknil Date: Mon, 18 May 2026 10:45:39 +0800 Subject: [PATCH] Support custom summarize timestamp column --- README.md | 9 +++++++-- src/telemetry_window_demo/cli.py | 24 ++++++++++++++++++------ tests/test_cli_errors.py | 30 ++++++++++++++++++++++++++++++ tests/test_cli_summarize.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 8 deletions(-) create mode 100644 tests/test_cli_summarize.py diff --git a/README.md b/README.md index dac009f..e1cdfea 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,13 @@ Other demo entrypoints: - `python -m telemetry_window_demo.cli run-ai-demo` - `python -m telemetry_window_demo.cli run-rule-dedup-demo` - `python -m telemetry_window_demo.cli run-config-change-demo` - -That command reads `data/raw/sample_events.jsonl` and regenerates: + +Useful inspection commands: + +- `python -m telemetry_window_demo.cli summarize --input data/raw/sample_events.jsonl` +- `python -m telemetry_window_demo.cli summarize --input events.csv --timestamp-col event_time` + +That command reads `data/raw/sample_events.jsonl` and regenerates: - `data/processed/features.csv` - `data/processed/alerts.csv` diff --git a/src/telemetry_window_demo/cli.py b/src/telemetry_window_demo/cli.py index 8362f60..cd53f24 100644 --- a/src/telemetry_window_demo/cli.py +++ b/src/telemetry_window_demo/cli.py @@ -67,8 +67,13 @@ def build_parser() -> argparse.ArgumentParser: "summarize", help="Summarize an input event file.", ) - summarize_parser.add_argument("--input", required=True, help="Path to .jsonl or .csv.") - summarize_parser.set_defaults(func=summarize_command) + summarize_parser.add_argument("--input", required=True, help="Path to .jsonl or .csv.") + summarize_parser.add_argument( + "--timestamp-col", + default=DEFAULT_TIMESTAMP_COLUMN, + help="Timestamp column name in the input event file.", + ) + summarize_parser.set_defaults(func=summarize_command) plot_parser = subparsers.add_parser("plot", help="Render plots from CSV outputs.") plot_parser.add_argument("--features", required=True, help="Path to features.csv.") @@ -174,10 +179,17 @@ def run_command(args: argparse.Namespace) -> None: print(f" - {plot_path.name}") -def summarize_command(args: argparse.Namespace) -> None: - events = normalize_events(load_events(args.input)) - min_time = format_timestamp(events["timestamp"].min()) - max_time = format_timestamp(events["timestamp"].max()) +def summarize_command(args: argparse.Namespace) -> None: + timestamp_col = _timestamp_column_config_value( + args.timestamp_col, + "timestamp-col", + ) + events = normalize_events( + load_events(args.input, timestamp_col=timestamp_col), + timestamp_col=timestamp_col, + ) + min_time = format_timestamp(events[timestamp_col].min()) + max_time = format_timestamp(events[timestamp_col].max()) top_event_types = events["event_type"].value_counts().head(5).to_dict() overall_error_rate = float(events["is_error"].mean()) if not events.empty else 0.0 diff --git a/tests/test_cli_errors.py b/tests/test_cli_errors.py index 25e2370..02cb04b 100644 --- a/tests/test_cli_errors.py +++ b/tests/test_cli_errors.py @@ -72,6 +72,36 @@ def test_main_reports_directory_input_without_traceback(tmp_path, capsys) -> Non assert "Traceback" not in stderr +def test_main_reports_bad_summarize_timestamp_column_without_traceback( + tmp_path, + capsys, +) -> None: + input_path = tmp_path / "events.csv" + input_path.write_text( + "event_type,source,target,status\n" + "2026-03-10T10:00:00Z,user_a,auth,ok\n", + encoding="utf-8", + ) + + with pytest.raises(SystemExit) as excinfo: + main( + [ + "summarize", + "--input", + str(input_path), + "--timestamp-col", + "event_type", + ] + ) + + assert excinfo.value.code == 1 + stderr = capsys.readouterr().err + assert stderr.startswith("error: ") + assert "timestamp-col" in stderr + assert "event_type" in stderr + assert "Traceback" not in stderr + + def test_main_reports_bad_plot_feature_table_without_traceback(tmp_path, capsys) -> None: features_path = tmp_path / "features.csv" features_path.write_text( diff --git a/tests/test_cli_summarize.py b/tests/test_cli_summarize.py new file mode 100644 index 0000000..f32f644 --- /dev/null +++ b/tests/test_cli_summarize.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from telemetry_window_demo.cli import main + + +def test_summarize_honors_configured_timestamp_column(tmp_path, capsys) -> None: + input_path = tmp_path / "events.csv" + input_path.write_text( + "event_time,event_type,source,target,status,severity\n" + "2026-03-10T10:00:10Z,login_success,user_a,auth,ok,low\n" + "2026-03-10T10:00:00Z,login_fail,user_b,auth,fail,high\n", + encoding="utf-8", + ) + + main( + [ + "summarize", + "--input", + str(input_path), + "--timestamp-col", + "event_time", + ] + ) + + stdout = capsys.readouterr().out + assert "events: 2" in stdout + assert "time_range: 2026-03-10T10:00:00Z -> 2026-03-10T10:00:10Z" in stdout + assert "unique_sources: 2" in stdout + assert "overall_error_rate: 0.50" in stdout