'
f'
{ICONS["check"]}
'
@@ -630,6 +712,15 @@ def render_clones_panel(ctx: ReportContext) -> tuple[str, bool, int, int]:
sub_tabs.append(
("segments", "Segments", len(ctx.segment_sorted), segment_section)
)
+ if suppressed_total > 0:
+ sub_tabs.append(
+ (
+ "suppressed",
+ "Suppressed",
+ suppressed_total,
+ _render_suppressed_clone_panel(ctx, suppressed_clone_groups),
+ )
+ )
panel = global_novelty_html + render_split_tabs(
group_id="clones", tabs=sub_tabs, emit_clone_counters=True
@@ -643,6 +734,11 @@ def render_clones_panel(ctx: ReportContext) -> tuple[str, bool, int, int]:
)
else:
clones_answer = f"{ctx.clone_groups_total} groups and {ctx.clone_instances_total} instances."
+ if suppressed_total > 0:
+ clones_answer += (
+ f" {suppressed_total} suppressed golden-fixture groups are excluded "
+ "from active review."
+ )
clones_tone: Tone = "warn" if ctx.clone_groups_total > 0 else "ok"
panel = (
insight_block(
diff --git a/codeclone/_html_report/_sections/_coupling.py b/codeclone/_html_report/_sections/_coupling.py
index 8b43683..6be91ac 100644
--- a/codeclone/_html_report/_sections/_coupling.py
+++ b/codeclone/_html_report/_sections/_coupling.py
@@ -15,6 +15,11 @@
from .._components import Tone, insight_block
from .._tables import render_rows_table
from .._tabs import render_split_tabs
+from ._coverage_join import (
+ coverage_join_quality_count,
+ coverage_join_quality_summary,
+ render_coverage_join_panel,
+)
if TYPE_CHECKING:
from collections.abc import Mapping
@@ -54,11 +59,16 @@ def render_quality_panel(ctx: ReportContext) -> str:
cohesion_summary = _as_mapping(ctx.cohesion_map.get("summary"))
complexity_summary = _as_mapping(ctx.complexity_map.get("summary"))
overloaded_modules_summary = _as_mapping(ctx.overloaded_modules_map.get("summary"))
+ coverage_join_summary = coverage_join_quality_summary(ctx)
coupling_high_risk = _as_int(coupling_summary.get("high_risk"))
cohesion_low = _as_int(cohesion_summary.get("low_cohesion"))
complexity_high_risk = _as_int(complexity_summary.get("high_risk"))
overloaded_module_candidates = _as_int(overloaded_modules_summary.get("candidates"))
+ coverage_review_items = coverage_join_quality_count(ctx)
+ coverage_hotspots = _as_int(coverage_join_summary.get("coverage_hotspots"))
+ coverage_scope_gaps = _as_int(coverage_join_summary.get("scope_gap_hotspots"))
+ coverage_join_status = str(coverage_join_summary.get("status", "")).strip()
cc_max = _as_int(complexity_summary.get("max"))
# Insight
@@ -77,11 +87,24 @@ def render_quality_panel(ctx: ReportContext) -> str:
f"max CBO {coupling_summary.get('max', 'n/a')}; "
f"max LCOM4 {cohesion_summary.get('max', 'n/a')}."
)
+ if coverage_join_summary:
+ if coverage_join_status == "ok":
+ answer += (
+ f" Coverage hotspots: {coverage_hotspots}; "
+ f"scope gaps: {coverage_scope_gaps}."
+ )
+ else:
+ answer += " Coverage join unavailable."
if overloaded_module_candidates > 0 or (
coupling_high_risk > 0 and cohesion_low > 0
):
tone = "risk"
- elif coupling_high_risk > 0 or cohesion_low > 0 or complexity_high_risk > 0:
+ elif (
+ coupling_high_risk > 0
+ or cohesion_low > 0
+ or complexity_high_risk > 0
+ or coverage_review_items > 0
+ ):
tone = "warn"
else:
tone = "ok"
@@ -197,6 +220,16 @@ def render_quality_panel(ctx: ReportContext) -> str:
gm_panel,
),
]
+ coverage_join_panel = render_coverage_join_panel(ctx)
+ if coverage_join_panel:
+ sub_tabs.append(
+ (
+ "coverage-join",
+ "Coverage Join",
+ coverage_review_items,
+ coverage_join_panel,
+ )
+ )
return insight_block(
question="Are there quality hotspots in the codebase?",
diff --git a/codeclone/_html_report/_sections/_coverage_join.py b/codeclone/_html_report/_sections/_coverage_join.py
new file mode 100644
index 0000000..d6c65f2
--- /dev/null
+++ b/codeclone/_html_report/_sections/_coverage_join.py
@@ -0,0 +1,254 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+"""Coverage Join HTML helpers for Quality tab rendering."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from ... import _coerce
+from ..._html_badges import _stat_card, _tab_empty_info
+from ..._html_escape import _escape_html
+from .._glossary import glossary_tip
+from .._tables import render_rows_table
+
+if TYPE_CHECKING:
+ from collections.abc import Mapping
+
+ from .._context import ReportContext
+
+_as_int = _coerce.as_int
+_as_mapping = _coerce.as_mapping
+_as_sequence = _coerce.as_sequence
+
+
+def coverage_join_quality_count(ctx: ReportContext) -> int:
+ coverage_summary = _coverage_join_summary(ctx)
+ if str(coverage_summary.get("status", "")).strip() != "ok":
+ return 0
+ return _as_int(coverage_summary.get("coverage_hotspots")) + _as_int(
+ coverage_summary.get("scope_gap_hotspots")
+ )
+
+
+def coverage_join_quality_summary(ctx: ReportContext) -> dict[str, object]:
+ return dict(_coverage_join_summary(ctx))
+
+
+def render_coverage_join_panel(ctx: ReportContext) -> str:
+ metrics_map = _as_mapping(getattr(ctx, "metrics_map", {}))
+ coverage_join = _as_mapping(metrics_map.get("coverage_join"))
+ coverage_summary = _as_mapping(coverage_join.get("summary"))
+ if not coverage_summary:
+ return ""
+
+ status = str(coverage_summary.get("status", "")).strip()
+ if status != "ok":
+ source = _source_label(str(coverage_summary.get("source", "")).strip())
+ invalid_reason_val = coverage_summary.get("invalid_reason")
+ invalid_reason = (
+ invalid_reason_val.strip() if isinstance(invalid_reason_val, str) else ""
+ )
+ detail_parts: list[str] = []
+ if source:
+ detail_parts.append(f"Source: {_escape_html(source)}")
+ if invalid_reason:
+ detail_parts.append(
+ f'
{_escape_html(invalid_reason)}'
+ )
+ return _tab_empty_info(
+ "Coverage Join is unavailable for this run.",
+ detail_html="
".join(detail_parts) if detail_parts else None,
+ )
+
+ cards = [
+ _status_card(coverage_summary),
+ _overall_coverage_card(coverage_summary),
+ _coverage_hotspots_card(coverage_summary),
+ _scope_gaps_card(coverage_summary),
+ _measured_units_card(coverage_summary),
+ ]
+
+ return (
+ f'
{"".join(cards)}
'
+ + '
Coverage review items
'
+ + render_rows_table(
+ headers=("Function", "Location", "CC", "Status", "Coverage", "Risk"),
+ rows=_coverage_join_table_rows(ctx, coverage_join),
+ empty_message=_coverage_join_empty_message(),
+ empty_description=_coverage_join_empty_description(),
+ raw_html_headers=("Location",),
+ ctx=ctx,
+ )
+ )
+
+
+def _coverage_join_summary(ctx: ReportContext) -> Mapping[str, object]:
+ metrics_map = _as_mapping(getattr(ctx, "metrics_map", {}))
+ coverage_join = _as_mapping(metrics_map.get("coverage_join"))
+ return _as_mapping(coverage_join.get("summary"))
+
+
+def _status_card(coverage_summary: Mapping[str, object]) -> str:
+ source = str(coverage_summary.get("source", "")).strip()
+ return _stat_card(
+ "Status",
+ "Joined",
+ detail=_micro_badges(("source", _source_label(source))) if source else "",
+ value_tone="good",
+ css_class="meta-item",
+ glossary_tip_fn=glossary_tip,
+ )
+
+
+def _overall_coverage_card(coverage_summary: Mapping[str, object]) -> str:
+ review_items = _as_int(coverage_summary.get("coverage_hotspots")) + _as_int(
+ coverage_summary.get("scope_gap_hotspots")
+ )
+ return _stat_card(
+ "Overall coverage",
+ _format_permille_pct(coverage_summary.get("overall_permille")),
+ detail=_micro_badges(
+ ("covered", _as_int(coverage_summary.get("overall_covered_lines"))),
+ ("executable", _as_int(coverage_summary.get("overall_executable_lines"))),
+ ),
+ value_tone="warn" if review_items > 0 else "good",
+ css_class="meta-item",
+ glossary_tip_fn=glossary_tip,
+ )
+
+
+def _coverage_hotspots_card(coverage_summary: Mapping[str, object]) -> str:
+ hotspots = _as_int(coverage_summary.get("coverage_hotspots"))
+ threshold = _as_int(coverage_summary.get("hotspot_threshold_percent"))
+ return _stat_card(
+ "Coverage hotspots",
+ hotspots,
+ detail=_micro_badges(("threshold", f"< {threshold}%")),
+ value_tone="bad" if hotspots > 0 else "good",
+ css_class="meta-item",
+ glossary_tip_fn=glossary_tip,
+ )
+
+
+def _scope_gaps_card(coverage_summary: Mapping[str, object]) -> str:
+ scope_gaps = _as_int(coverage_summary.get("scope_gap_hotspots"))
+ return _stat_card(
+ "Scope gaps",
+ scope_gaps,
+ detail=_micro_badges(
+ (
+ "not mapped",
+ _as_int(coverage_summary.get("missing_from_report_units")),
+ ),
+ ),
+ value_tone="warn" if scope_gaps > 0 else "good",
+ css_class="meta-item",
+ glossary_tip_fn=glossary_tip,
+ )
+
+
+def _measured_units_card(coverage_summary: Mapping[str, object]) -> str:
+ return _stat_card(
+ "Measured units",
+ _as_int(coverage_summary.get("measured_units")),
+ detail=_micro_badges(("units", _as_int(coverage_summary.get("units")))),
+ css_class="meta-item",
+ glossary_tip_fn=glossary_tip,
+ )
+
+
+def _coverage_join_table_rows(
+ ctx: ReportContext,
+ coverage_family: Mapping[str, object],
+) -> list[tuple[str, str, str, str, str, str]]:
+ review_items = [
+ _as_mapping(item)
+ for item in _as_sequence(coverage_family.get("items"))
+ if bool(_as_mapping(item).get("coverage_review_item"))
+ or bool(_as_mapping(item).get("coverage_hotspot"))
+ or bool(_as_mapping(item).get("scope_gap_hotspot"))
+ ]
+ return [
+ (
+ str(item.get("qualname", "")).strip() or "(unknown)",
+ _location_cell_html(ctx, item),
+ str(_as_int(item.get("cyclomatic_complexity"))),
+ _status_cell_label(item),
+ _coverage_cell_label(item),
+ str(item.get("risk", "low")).strip() or "low",
+ )
+ for item in review_items[:50]
+ ]
+
+
+def _coverage_join_empty_message() -> str:
+ return "No medium/high-risk functions need joined-coverage follow-up."
+
+
+def _coverage_join_empty_description() -> str:
+ return (
+ "No risky functions were below threshold or missing from the supplied "
+ "coverage.xml."
+ )
+
+
+def _location_cell_html(ctx: ReportContext, item: Mapping[str, object]) -> str:
+ relative_path = str(item.get("relative_path", "")).strip()
+ start_line = _as_int(item.get("start_line"))
+ end_line = _as_int(item.get("end_line"))
+ line_label = (
+ f"{relative_path}:{start_line}"
+ if start_line > 0
+ else (relative_path or "(unknown)")
+ )
+ if end_line > start_line > 0:
+ line_label = f"{relative_path}:{start_line}-{end_line}"
+ file_target = (
+ f"{ctx.scan_root.rstrip('/')}/{relative_path}"
+ if ctx.scan_root and relative_path
+ else relative_path
+ )
+ return (
+ f'
0 else 1}">'
+ f"{_escape_html(line_label)}"
+ )
+
+
+def _status_cell_label(item: Mapping[str, object]) -> str:
+ if bool(item.get("scope_gap_hotspot")):
+ return "not in coverage.xml"
+ if bool(item.get("coverage_hotspot")):
+ return "below threshold"
+ return str(item.get("coverage_status", "")).replace("_", " ").strip() or "n/a"
+
+
+def _coverage_cell_label(item: Mapping[str, object]) -> str:
+ if bool(item.get("scope_gap_hotspot")):
+ return "n/a"
+ return _format_permille_pct(item.get("coverage_permille"))
+
+
+def _micro_badges(*pairs: tuple[str, object]) -> str:
+ return "".join(
+ f'
'
+ f'{_escape_html(str(value))}'
+ f'{_escape_html(label)}'
+ for label, value in pairs
+ if value is not None and str(value) != "n/a"
+ )
+
+
+def _format_permille_pct(value: object) -> str:
+ return f"{_as_int(value) / 10.0:.1f}%"
+
+
+def _source_label(source: str) -> str:
+ name = Path(source).name
+ return name or source
diff --git a/codeclone/_html_report/_sections/_overview.py b/codeclone/_html_report/_sections/_overview.py
index fe061ab..b0483e1 100644
--- a/codeclone/_html_report/_sections/_overview.py
+++ b/codeclone/_html_report/_sections/_overview.py
@@ -59,6 +59,7 @@
"cohesion": "cohesion",
"coupling": "coupling",
"dead_code": "dead code",
+ "coverage": "coverage",
"dependency": "dependency",
}
@@ -433,104 +434,98 @@ def _format_permille_delta(value: object) -> str:
return f"{sign}{delta / 10.0:.1f}pt"
-def _overview_stat(value: str, label: str) -> str:
- return (
- '
'
- f'
{_escape_html(value)}
'
- f'
{_escape_html(label)}
'
- "
"
+def _fact_row(
+ label: str,
+ value: str,
+ *,
+ delta: str | None = None,
+ value_cls: str = "",
+) -> str:
+ cls = f" overview-fact-value--{value_cls}" if value_cls else ""
+ delta_html = (
+ f'
{_escape_html(delta)}'
+ if delta
+ else ""
)
-
-
-def _overview_stat_row(*stats: tuple[str, str]) -> str:
return (
- '
'
- + "".join(_overview_stat(value, label) for value, label in stats)
- + "
"
+ '
'
+ f'{_escape_html(label)}'
+ f''
+ f"{_escape_html(value)}{delta_html}"
+ "
"
)
def _adoption_card_html(adoption_summary: Mapping[str, object]) -> str:
- params_pct = _format_permille_pct(adoption_summary.get("param_permille"))
- returns_pct = _format_permille_pct(adoption_summary.get("return_permille"))
- docs_pct = _format_permille_pct(adoption_summary.get("docstring_permille"))
- stats_html = _overview_stat_row(
- (params_pct, "params"),
- (returns_pct, "returns"),
- (docs_pct, "docstrings"),
- )
+ has_baseline = bool(adoption_summary.get("baseline_diff_available"))
- deltas_html = ""
- if bool(adoption_summary.get("baseline_diff_available")):
- deltas_html = _mb(
- (
- "\u0394 params",
- _format_permille_delta(adoption_summary.get("param_delta")),
- ),
- (
- "\u0394 returns",
- _format_permille_delta(adoption_summary.get("return_delta")),
- ),
- (
- "\u0394 docs",
- _format_permille_delta(adoption_summary.get("docstring_delta")),
- ),
- )
- if deltas_html:
- deltas_html = f'
{deltas_html}
'
+ def _delta_or_none(key: str) -> str | None:
+ if not has_baseline:
+ return None
+ return _format_permille_delta(adoption_summary.get(key))
+
+ rows = [
+ _fact_row(
+ "Param annotations",
+ _format_permille_pct(adoption_summary.get("param_permille")),
+ delta=_delta_or_none("param_delta"),
+ ),
+ _fact_row(
+ "Return annotations",
+ _format_permille_pct(adoption_summary.get("return_permille")),
+ delta=_delta_or_none("return_delta"),
+ ),
+ _fact_row(
+ "Docstrings",
+ _format_permille_pct(adoption_summary.get("docstring_permille")),
+ delta=_delta_or_none("docstring_delta"),
+ ),
+ ]
any_count = _as_int(adoption_summary.get("typing_any_count"))
- if any_count > 0:
- noun = "symbol" if any_count == 1 else "symbols"
- caption_html = (
- '
'
- f"{_format_count(any_count)} {noun} typed as Any"
- "
"
- )
- else:
- caption_html = (
- '
'
- "No symbols fall back to Any."
- "
"
+ rows.append(
+ _fact_row(
+ "Typed as Any",
+ _format_count(any_count),
+ value_cls="good" if any_count == 0 else "warn",
)
+ )
- return stats_html + deltas_html + caption_html
+ return '
' + "".join(rows) + "
"
def _api_card_html(api_summary: Mapping[str, object]) -> str:
if not bool(api_summary.get("enabled")):
return (
'
Disabled in this run.
'
- '
'
- "Enable with --api-surface to track public symbols."
- "
"
+ '
'
+ + _fact_row("Enable via", "--api-surface")
+ + "
"
)
symbols = _as_int(api_summary.get("public_symbols"))
modules = _as_int(api_summary.get("modules"))
- stats_html = _overview_stat_row(
- (_format_count(symbols), "public symbols"),
- (_format_count(modules), "modules"),
- )
+ rows = [
+ _fact_row("Public symbols", _format_count(symbols)),
+ _fact_row("Modules", _format_count(modules)),
+ ]
- chips_html = ""
if bool(api_summary.get("baseline_diff_available")):
breaking = _as_int(api_summary.get("breaking"))
added = _as_int(api_summary.get("added"))
- chips_html = _mb(("breaking", breaking), ("added", added))
- if chips_html:
- chips_html = f'
{chips_html}
'
+ rows.append(
+ _fact_row(
+ "Breaking changes",
+ _format_count(breaking),
+ value_cls="warn" if breaking > 0 else "good",
+ )
+ )
+ rows.append(_fact_row("Added symbols", _format_count(added)))
if bool(api_summary.get("strict_types")):
- caption_html = (
- '
'
- "Strict type check enabled for the public surface."
- "
"
- )
- else:
- caption_html = ""
+ rows.append(_fact_row("Strict mode", "enabled", value_cls="good"))
- return stats_html + chips_html + caption_html
+ return '
' + "".join(rows) + "
"
def _adoption_and_api_section(ctx: ReportContext) -> str:
@@ -1055,6 +1050,12 @@ def _analytics_section(ctx: ReportContext) -> str:
return ""
radar_html = _health_radar_svg(dimensions)
+ radar_legend = (
+ '
'
+ "Higher values indicate better code health."
+ " Red labels highlight dimensions below 60."
+ "
"
+ )
return (
'
'
@@ -1063,6 +1064,8 @@ def _analytics_section(ctx: ReportContext) -> str:
"Dimension scores across all quality axes.",
)
+ ''
- + overview_summary_item_html(label="Health profile", body_html=radar_html)
+ + overview_summary_item_html(
+ label="Health profile", body_html=radar_html + radar_legend
+ )
+ "
"
)
diff --git a/codeclone/_html_report/_tables.py b/codeclone/_html_report/_tables.py
index d1b6331..7f633f2 100644
--- a/codeclone/_html_report/_tables.py
+++ b/codeclone/_html_report/_tables.py
@@ -72,12 +72,13 @@ def render_rows_table(
headers: Sequence[str],
rows: Sequence[Sequence[str]],
empty_message: str,
+ empty_description: str | None = "Nothing to report - keep up the good work.",
raw_html_headers: Collection[str] = (),
ctx: ReportContext | None = None,
) -> str:
"""Render a data table with badges, tooltips, and col sizing."""
if not rows:
- return _tab_empty(empty_message)
+ return _tab_empty(empty_message, description=empty_description)
lower_headers = [h.lower() for h in headers]
raw_html_set = {h.lower() for h in raw_html_headers}
diff --git a/codeclone/cache.py b/codeclone/cache.py
index f7314e1..b078d8b 100644
--- a/codeclone/cache.py
+++ b/codeclone/cache.py
@@ -1407,18 +1407,15 @@ def _canonicalize_cache_entry(entry: CacheEntry) -> CacheEntry:
kind=symbol["kind"],
start_line=symbol["start_line"],
end_line=symbol["end_line"],
- params=sorted(
- [
- ApiParamSpecDict(
- name=param["name"],
- kind=param["kind"],
- has_default=param["has_default"],
- annotation_hash=param["annotation_hash"],
- )
- for param in symbol.get("params", [])
- ],
- key=lambda item: (item["kind"], item["name"]),
- ),
+ params=[
+ ApiParamSpecDict(
+ name=param["name"],
+ kind=param["kind"],
+ has_default=param["has_default"],
+ annotation_hash=param["annotation_hash"],
+ )
+ for param in symbol.get("params", [])
+ ],
returns_hash=symbol.get("returns_hash", ""),
exported_via=symbol.get("exported_via", "name"),
)
diff --git a/codeclone/cli.py b/codeclone/cli.py
index 13dd206..ba81092 100644
--- a/codeclone/cli.py
+++ b/codeclone/cli.py
@@ -490,6 +490,8 @@ def report(
new_block: set[str],
html_builder: Callable[..., str] | None = None,
metrics_diff: MetricsDiff | None = None,
+ coverage_adoption_diff_available: bool = False,
+ api_surface_diff_available: bool = False,
include_report_document: bool = False,
) -> ReportArtifacts:
return cast(
@@ -504,6 +506,8 @@ def report(
new_block=new_block,
html_builder=html_builder,
metrics_diff=metrics_diff,
+ coverage_adoption_diff_available=coverage_adoption_diff_available,
+ api_surface_diff_available=api_surface_diff_available,
include_report_document=include_report_document,
),
)
@@ -955,6 +959,14 @@ def _require_rich_console(
except CacheError as exc:
console.print(ui.fmt_cache_save_failed(exc))
+ coverage_join = getattr(analysis_result, "coverage_join", None)
+ if (
+ coverage_join is not None
+ and coverage_join.status != "ok"
+ and coverage_join.invalid_reason
+ ):
+ console.print(ui.fmt_coverage_join_ignored(coverage_join.invalid_reason))
+
return discovery_result, processing_result, analysis_result
@@ -1015,6 +1027,24 @@ def _enforce_gating(
)
sys.exit(metrics_baseline_failure_code)
+ if bool(getattr(args, "fail_on_untested_hotspots", False)):
+ if analysis.coverage_join is None:
+ console.print(
+ ui.fmt_contract_error(
+ "--fail-on-untested-hotspots requires --coverage."
+ )
+ )
+ sys.exit(ExitCode.CONTRACT_ERROR)
+ if analysis.coverage_join.status != "ok":
+ detail = analysis.coverage_join.invalid_reason or "invalid coverage input"
+ console.print(
+ ui.fmt_contract_error(
+ "Coverage gating requires a valid Cobertura XML input.\n"
+ f"Reason: {detail}"
+ )
+ )
+ sys.exit(ExitCode.CONTRACT_ERROR)
+
gate_result = gate(
boot=boot,
analysis=analysis,
@@ -1443,6 +1473,19 @@ def _prepare_run_inputs() -> tuple[
metrics_diff = metrics_baseline_state.baseline.diff(
analysis_result.project_metrics
)
+ coverage_adoption_diff_available = bool(
+ metrics_baseline_state.trusted_for_diff
+ and getattr(
+ metrics_baseline_state.baseline,
+ "has_coverage_adoption_snapshot",
+ False,
+ )
+ )
+ api_surface_diff_available = bool(
+ metrics_baseline_state.trusted_for_diff
+ and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None)
+ is not None
+ )
_print_summary(
console=cast("_PrinterLike", console),
@@ -1470,6 +1513,9 @@ def _prepare_run_inputs() -> tuple[
func_clones_count=analysis_result.func_clones_count,
block_clones_count=analysis_result.block_clones_count,
segment_clones_count=analysis_result.segment_clones_count,
+ suppressed_golden_fixture_groups=len(
+ getattr(analysis_result, "suppressed_clone_groups", ())
+ ),
suppressed_segment_groups=analysis_result.suppressed_segment_groups,
new_clones_count=new_clones_count,
)
@@ -1486,12 +1532,11 @@ def _prepare_run_inputs() -> tuple[
api_surface_summary = _as_mapping(
_as_mapping(metrics_payload_map.get("api_surface")).get("summary")
)
- api_surface_diff_available = bool(
- metrics_baseline_state.trusted_for_diff
- and getattr(metrics_baseline_state.baseline, "api_surface_snapshot", None)
- is not None
+ coverage_join_summary = _as_mapping(
+ _as_mapping(metrics_payload_map.get("coverage_join")).get("summary")
)
overloaded_modules_summary_map = _as_mapping(overloaded_modules_summary)
+ coverage_join_source = str(coverage_join_summary.get("source", "")).strip()
_print_metrics(
console=cast("_PrinterLike", console),
quiet=args.quiet,
@@ -1553,6 +1598,24 @@ def _prepare_run_inputs() -> tuple[
if metrics_diff is not None and api_surface_diff_available
else 0
),
+ coverage_join_status=str(
+ coverage_join_summary.get("status", "")
+ ).strip(),
+ coverage_join_overall_permille=_as_int(
+ coverage_join_summary.get("overall_permille")
+ ),
+ coverage_join_coverage_hotspots=_as_int(
+ coverage_join_summary.get("coverage_hotspots")
+ ),
+ coverage_join_scope_gap_hotspots=_as_int(
+ coverage_join_summary.get("scope_gap_hotspots")
+ ),
+ coverage_join_threshold_percent=_as_int(
+ coverage_join_summary.get("hotspot_threshold_percent")
+ ),
+ coverage_join_source_label=(
+ Path(coverage_join_source).name if coverage_join_source else ""
+ ),
),
)
@@ -1566,6 +1629,8 @@ def _prepare_run_inputs() -> tuple[
new_block=new_block,
html_builder=build_html_report,
metrics_diff=metrics_diff,
+ coverage_adoption_diff_available=coverage_adoption_diff_available,
+ api_surface_diff_available=api_surface_diff_available,
include_report_document=bool(changed_paths),
)
changed_clone_gate = (
diff --git a/codeclone/contracts.py b/codeclone/contracts.py
index f6b98f7..06e9517 100644
--- a/codeclone/contracts.py
+++ b/codeclone/contracts.py
@@ -12,8 +12,8 @@
BASELINE_SCHEMA_VERSION: Final = "2.1"
BASELINE_FINGERPRINT_VERSION: Final = "1"
-CACHE_VERSION: Final = "2.3"
-REPORT_SCHEMA_VERSION: Final = "2.5"
+CACHE_VERSION: Final = "2.4"
+REPORT_SCHEMA_VERSION: Final = "2.8"
METRICS_BASELINE_SCHEMA_VERSION: Final = "1.2"
DEFAULT_COMPLEXITY_THRESHOLD: Final = 20
diff --git a/codeclone/domain/findings.py b/codeclone/domain/findings.py
index 66f4851..686e44f 100644
--- a/codeclone/domain/findings.py
+++ b/codeclone/domain/findings.py
@@ -34,12 +34,16 @@
CATEGORY_COHESION: Final = "cohesion"
CATEGORY_DEAD_CODE: Final = "dead_code"
CATEGORY_DEPENDENCY: Final = "dependency"
+CATEGORY_COVERAGE: Final = "coverage"
FINDING_KIND_CLONE_GROUP: Final = "clone_group"
FINDING_KIND_UNUSED_SYMBOL: Final = "unused_symbol"
FINDING_KIND_CLASS_HOTSPOT: Final = "class_hotspot"
FINDING_KIND_FUNCTION_HOTSPOT: Final = "function_hotspot"
FINDING_KIND_CYCLE: Final = "cycle"
+FINDING_KIND_UNTESTED_HOTSPOT: Final = "untested_hotspot"
+FINDING_KIND_COVERAGE_HOTSPOT: Final = "coverage_hotspot"
+FINDING_KIND_COVERAGE_SCOPE_GAP: Final = "coverage_scope_gap"
STRUCTURAL_KIND_DUPLICATED_BRANCHES: Final = "duplicated_branches"
STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE: Final = "clone_guard_exit_divergence"
@@ -50,6 +54,7 @@
"CATEGORY_COHESION",
"CATEGORY_COMPLEXITY",
"CATEGORY_COUPLING",
+ "CATEGORY_COVERAGE",
"CATEGORY_DEAD_CODE",
"CATEGORY_DEPENDENCY",
"CATEGORY_STRUCTURAL",
@@ -66,8 +71,11 @@
"FAMILY_STRUCTURAL",
"FINDING_KIND_CLASS_HOTSPOT",
"FINDING_KIND_CLONE_GROUP",
+ "FINDING_KIND_COVERAGE_HOTSPOT",
+ "FINDING_KIND_COVERAGE_SCOPE_GAP",
"FINDING_KIND_CYCLE",
"FINDING_KIND_FUNCTION_HOTSPOT",
+ "FINDING_KIND_UNTESTED_HOTSPOT",
"FINDING_KIND_UNUSED_SYMBOL",
"STRUCTURAL_KIND_CLONE_COHORT_DRIFT",
"STRUCTURAL_KIND_CLONE_GUARD_EXIT_DIVERGENCE",
diff --git a/codeclone/golden_fixtures.py b/codeclone/golden_fixtures.py
new file mode 100644
index 0000000..3b6fe47
--- /dev/null
+++ b/codeclone/golden_fixtures.py
@@ -0,0 +1,178 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from __future__ import annotations
+
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+from typing import Literal
+
+from .domain.source_scope import SOURCE_KIND_FIXTURES, SOURCE_KIND_TESTS
+from .models import (
+ GroupItem,
+ GroupItemLike,
+ GroupMap,
+ GroupMapLike,
+ SuppressedCloneGroup,
+)
+from .paths import classify_source_kind, normalize_repo_path, relative_repo_path
+
+CloneGroupKind = Literal["function", "block", "segment"]
+
+GOLDEN_FIXTURE_SUPPRESSION_RULE = "golden_fixture"
+GOLDEN_FIXTURE_SUPPRESSION_SOURCE = "project_config"
+
+_ALLOWED_SOURCE_KINDS = frozenset({SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES})
+
+
+class GoldenFixturePatternError(ValueError):
+ """Raised when golden_fixture_paths contains an invalid pattern."""
+
+
+@dataclass(frozen=True, slots=True)
+class GoldenFixtureGroupSplit:
+ active_groups: GroupMap
+ suppressed_groups: GroupMap
+ matched_patterns: dict[str, tuple[str, ...]]
+
+
+def normalize_golden_fixture_patterns(patterns: Sequence[str]) -> tuple[str, ...]:
+ normalized: list[str] = []
+ seen: set[str] = set()
+ for raw_pattern in patterns:
+ pattern = normalize_repo_path(str(raw_pattern))
+ while pattern.startswith("./"):
+ pattern = pattern[2:]
+ pattern = pattern.rstrip("/")
+ if not pattern:
+ raise GoldenFixturePatternError(
+ "tool.codeclone.golden_fixture_paths entries must be non-empty"
+ )
+ pure_pattern = PurePosixPath(pattern)
+ if pure_pattern.is_absolute():
+ raise GoldenFixturePatternError(
+ "tool.codeclone.golden_fixture_paths entries must be repo-relative"
+ )
+ if any(part == ".." for part in pure_pattern.parts):
+ raise GoldenFixturePatternError(
+ "tool.codeclone.golden_fixture_paths entries must not contain '..'"
+ )
+ source_kind = classify_source_kind(pattern)
+ if source_kind not in _ALLOWED_SOURCE_KINDS:
+ raise GoldenFixturePatternError(
+ "tool.codeclone.golden_fixture_paths entries must target tests/ or "
+ "tests/fixtures/ paths"
+ )
+ if pattern not in seen:
+ normalized.append(pattern)
+ seen.add(pattern)
+ return tuple(normalized)
+
+
+def path_matches_golden_fixture_pattern(relative_path: str, pattern: str) -> bool:
+ normalized_path = normalize_repo_path(relative_path).lstrip("./")
+ if not normalized_path:
+ return False
+ candidate = PurePosixPath(normalized_path)
+ candidates = [candidate, *candidate.parents[:-1]]
+ return any(path.match(pattern) for path in candidates)
+
+
+def split_clone_groups_for_golden_fixtures(
+ *,
+ groups: GroupMapLike,
+ kind: CloneGroupKind,
+ golden_fixture_paths: Sequence[str],
+ scan_root: str = "",
+) -> GoldenFixtureGroupSplit:
+ active: GroupMap = {}
+ suppressed: GroupMap = {}
+ matched_patterns: dict[str, tuple[str, ...]] = {}
+ if not golden_fixture_paths:
+ for group_key in sorted(groups):
+ active[group_key] = [_copy_group_item(item) for item in groups[group_key]]
+ return GoldenFixtureGroupSplit(
+ active_groups=active,
+ suppressed_groups=suppressed,
+ matched_patterns=matched_patterns,
+ )
+
+ for group_key in sorted(groups):
+ copied_items = [_copy_group_item(item) for item in groups[group_key]]
+ group_patterns = _matched_patterns_for_group(
+ copied_items,
+ patterns=golden_fixture_paths,
+ scan_root=scan_root,
+ )
+ if group_patterns:
+ suppressed[group_key] = copied_items
+ matched_patterns[group_key] = group_patterns
+ else:
+ active[group_key] = copied_items
+ return GoldenFixtureGroupSplit(
+ active_groups=active,
+ suppressed_groups=suppressed,
+ matched_patterns=matched_patterns,
+ )
+
+
+def build_suppressed_clone_groups(
+ *,
+ kind: CloneGroupKind,
+ groups: GroupMapLike,
+ matched_patterns: Mapping[str, Sequence[str]],
+) -> tuple[SuppressedCloneGroup, ...]:
+ suppressed_groups: list[SuppressedCloneGroup] = []
+ for group_key in sorted(groups):
+ patterns = tuple(
+ str(pattern).strip()
+ for pattern in matched_patterns.get(group_key, ())
+ if str(pattern).strip()
+ )
+ if not patterns:
+ continue
+ suppressed_groups.append(
+ SuppressedCloneGroup(
+ kind=kind,
+ group_key=group_key,
+ items=tuple(_copy_group_item(item) for item in groups[group_key]),
+ matched_patterns=patterns,
+ suppression_rule=GOLDEN_FIXTURE_SUPPRESSION_RULE,
+ suppression_source=GOLDEN_FIXTURE_SUPPRESSION_SOURCE,
+ )
+ )
+ return tuple(suppressed_groups)
+
+
+def _copy_group_item(item: GroupItemLike) -> GroupItem:
+ return {str(key): value for key, value in item.items()}
+
+
+def _matched_patterns_for_group(
+ items: Sequence[GroupItemLike],
+ *,
+ patterns: Sequence[str],
+ scan_root: str,
+) -> tuple[str, ...]:
+ matched: set[str] = set()
+ for item in items:
+ filepath = str(item.get("filepath", "")).strip()
+ if not filepath:
+ return ()
+ source_kind = classify_source_kind(filepath, scan_root=scan_root)
+ if source_kind not in _ALLOWED_SOURCE_KINDS:
+ return ()
+ relative_path = relative_repo_path(filepath, scan_root=scan_root)
+ item_matches = tuple(
+ pattern
+ for pattern in patterns
+ if path_matches_golden_fixture_pattern(relative_path, pattern)
+ )
+ if not item_matches:
+ return ()
+ matched.update(item_matches)
+ return tuple(sorted(matched))
diff --git a/codeclone/mcp_server.py b/codeclone/mcp_server.py
index f577f74..ee7a6fc 100644
--- a/codeclone/mcp_server.py
+++ b/codeclone/mcp_server.py
@@ -39,9 +39,10 @@
"them only for an explicit higher-sensitivity follow-up when needed. Use "
"get_report_section(section='metrics_detail', family=..., limit=...) for "
"bounded metrics drill-down, and prefer generate_pr_summary(format='markdown') "
- "unless machine JSON is required. Pass an absolute repository root to "
- "analysis tools. This server never updates baselines and never mutates "
- "source files."
+ "unless machine JSON is required. Coverage join accepts external Cobertura "
+ "XML as a current-run signal and does not become baseline truth. Pass an "
+ "absolute repository root to analysis tools. This server never updates "
+ "baselines and never mutates source files."
)
_MCP_INSTALL_HINT = (
"CodeClone MCP support requires the optional 'mcp' extra. "
@@ -159,6 +160,8 @@ def analyze_repository(
segment_min_loc: int | None = None,
segment_min_stmt: int | None = None,
api_surface: bool | None = None,
+ coverage_xml: str | None = None,
+ coverage_min: int | None = None,
complexity_threshold: int | None = None,
coupling_threshold: int | None = None,
cohesion_threshold: int | None = None,
@@ -184,6 +187,8 @@ def analyze_repository(
segment_min_loc=segment_min_loc,
segment_min_stmt=segment_min_stmt,
api_surface=api_surface,
+ coverage_xml=coverage_xml,
+ coverage_min=coverage_min,
complexity_threshold=complexity_threshold,
coupling_threshold=coupling_threshold,
cohesion_threshold=cohesion_threshold,
@@ -225,6 +230,8 @@ def analyze_changed_paths(
segment_min_loc: int | None = None,
segment_min_stmt: int | None = None,
api_surface: bool | None = None,
+ coverage_xml: str | None = None,
+ coverage_min: int | None = None,
complexity_threshold: int | None = None,
coupling_threshold: int | None = None,
cohesion_threshold: int | None = None,
@@ -250,6 +257,8 @@ def analyze_changed_paths(
segment_min_loc=segment_min_loc,
segment_min_stmt=segment_min_stmt,
api_surface=api_surface,
+ coverage_xml=coverage_xml,
+ coverage_min=coverage_min,
complexity_threshold=complexity_threshold,
coupling_threshold=coupling_threshold,
cohesion_threshold=cohesion_threshold,
@@ -304,7 +313,7 @@ def get_production_triage(
"canonical doc links. Use this when workflow or contract meaning "
"is unclear. This is bounded guidance, not a full manual. "
"Supported topics: workflow, analysis_profile, suppressions, "
- "baseline, latest_runs, review_state, changed_scope."
+ "baseline, coverage, latest_runs, review_state, changed_scope."
),
annotations=read_only_tool,
structured_output=True,
@@ -341,8 +350,10 @@ def evaluate_gates(
fail_on_typing_regression: bool = False,
fail_on_docstring_regression: bool = False,
fail_on_api_break: bool = False,
+ fail_on_untested_hotspots: bool = False,
min_typing_coverage: int = -1,
min_docstring_coverage: int = -1,
+ coverage_min: int = 50,
) -> dict[str, object]:
return service.evaluate_gates(
MCPGateRequest(
@@ -359,8 +370,10 @@ def evaluate_gates(
fail_on_typing_regression=fail_on_typing_regression,
fail_on_docstring_regression=fail_on_docstring_regression,
fail_on_api_break=fail_on_api_break,
+ fail_on_untested_hotspots=fail_on_untested_hotspots,
min_typing_coverage=min_typing_coverage,
min_docstring_coverage=min_docstring_coverage,
+ coverage_min=coverage_min,
)
)
diff --git a/codeclone/mcp_service.py b/codeclone/mcp_service.py
index 71f1d82..6a22906 100644
--- a/codeclone/mcp_service.py
+++ b/codeclone/mcp_service.py
@@ -92,7 +92,7 @@
SOURCE_KIND_PRODUCTION,
SOURCE_KIND_TESTS,
)
-from .models import MetricsDiff, ProjectMetrics, Suggestion
+from .models import CoverageJoinResult, MetricsDiff, ProjectMetrics, Suggestion
from .pipeline import (
GatingResult,
MetricGateConfig,
@@ -132,6 +132,7 @@
"analysis_profile",
"suppressions",
"baseline",
+ "coverage",
"latest_runs",
"review_state",
"changed_scope",
@@ -142,6 +143,7 @@
"coupling",
"cohesion",
"coverage_adoption",
+ "coverage_join",
"dependencies",
"dead_code",
"api_surface",
@@ -186,6 +188,9 @@
"typing_coverage",
"docstring_coverage",
"api_surface",
+ "coverage_xml",
+ "coverage_min",
+ "golden_fixture_paths",
}
)
_RESOURCE_SECTION_MAP: Final[dict[str, ReportSection]] = {
@@ -236,6 +241,7 @@
"analysis_profile",
"suppressions",
"baseline",
+ "coverage",
"latest_runs",
"review_state",
"changed_scope",
@@ -297,6 +303,7 @@
"coupling",
"cohesion",
"coverage_adoption",
+ "coverage_join",
"dependencies",
"dead_code",
"api_surface",
@@ -336,6 +343,14 @@ class MCPHelpTopicSpec:
"Config and defaults",
f"{_MCP_BOOK_URL}04-config-and-defaults/",
)
+_REPORT_DOC_LINK: Final[tuple[str, str]] = (
+ "Report contract",
+ f"{_MCP_BOOK_URL}08-report/",
+)
+_CLI_DOC_LINK: Final[tuple[str, str]] = (
+ "CLI contract",
+ f"{_MCP_BOOK_URL}09-cli/",
+)
_PIPELINE_DOC_LINK: Final[tuple[str, str]] = (
"Core pipeline",
f"{_MCP_BOOK_URL}05-core-pipeline/",
@@ -540,6 +555,49 @@ class MCPHelpTopicSpec:
"Assuming an untrusted baseline is only cosmetic in CI contexts.",
),
),
+ "coverage": MCPHelpTopicSpec(
+ summary=(
+ "Coverage join is an external current-run signal: CodeClone reads "
+ "an existing Cobertura XML report and joins line hits to risky "
+ "function spans."
+ ),
+ key_points=(
+ "Use Cobertura XML such as `coverage xml` output from coverage.py.",
+ "Coverage join does not become baseline truth and does not affect health.",
+ (
+ "Coverage hotspot gating is current-run only and focuses on "
+ "medium/high-risk functions measured below the configured "
+ "threshold."
+ ),
+ (
+ "Functions missing from the supplied coverage.xml are surfaced "
+ "as scope gaps, not labeled as untested."
+ ),
+ "Use metrics_detail(family='coverage_join') for bounded drill-down.",
+ ),
+ recommended_tools=(
+ "analyze_repository",
+ "analyze_changed_paths",
+ "get_run_summary",
+ "get_report_section",
+ "evaluate_gates",
+ ),
+ doc_links=(
+ _MCP_INTERFACE_DOC_LINK,
+ _CLI_DOC_LINK,
+ _REPORT_DOC_LINK,
+ ),
+ warnings=(
+ "Coverage join is only as accurate as the external XML path mapping.",
+ "It does not infer branch coverage and does not execute tests.",
+ "Use fail-on-untested-hotspots only with a valid joined coverage input.",
+ ),
+ anti_patterns=(
+ "Treating missing coverage XML as zero coverage without stating it.",
+ "Reading coverage join as a baseline-aware trend signal.",
+ "Assuming dynamic runtime dispatch is visible through a static line join.",
+ ),
+ ),
"latest_runs": MCPHelpTopicSpec(
summary=(
"latest/* resources point to the most recent analysis run in the "
@@ -888,6 +946,8 @@ class MCPAnalysisRequest:
segment_min_loc: int | None = None
segment_min_stmt: int | None = None
api_surface: bool | None = None
+ coverage_xml: str | None = None
+ coverage_min: int | None = None
complexity_threshold: int | None = None
coupling_threshold: int | None = None
cohesion_threshold: int | None = None
@@ -914,8 +974,10 @@ class MCPGateRequest:
fail_on_typing_regression: bool = False
fail_on_docstring_regression: bool = False
fail_on_api_break: bool = False
+ fail_on_untested_hotspots: bool = False
min_typing_coverage: int = -1
min_docstring_coverage: int = -1
+ coverage_min: int = 50
@dataclass(frozen=True, slots=True)
@@ -933,6 +995,7 @@ class MCPRunRecord:
func_clones_count: int
block_clones_count: int
project_metrics: ProjectMetrics | None
+ coverage_join: CoverageJoinResult | None
suggestions: tuple[Suggestion, ...]
new_func: frozenset[str]
new_block: frozenset[str]
@@ -1206,6 +1269,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]:
func_clones_count=analysis_result.func_clones_count,
block_clones_count=analysis_result.block_clones_count,
project_metrics=analysis_result.project_metrics,
+ coverage_join=analysis_result.coverage_join,
suggestions=analysis_result.suggestions,
new_func=frozenset(new_func),
new_block=frozenset(new_block),
@@ -1231,6 +1295,7 @@ def analyze_repository(self, request: MCPAnalysisRequest) -> dict[str, object]:
func_clones_count=analysis_result.func_clones_count,
block_clones_count=analysis_result.block_clones_count,
project_metrics=analysis_result.project_metrics,
+ coverage_join=analysis_result.coverage_join,
suggestions=analysis_result.suggestions,
new_func=frozenset(new_func),
new_block=frozenset(new_block),
@@ -1361,8 +1426,10 @@ def evaluate_gates(self, request: MCPGateRequest) -> dict[str, object]:
"fail_on_typing_regression": request.fail_on_typing_regression,
"fail_on_docstring_regression": request.fail_on_docstring_regression,
"fail_on_api_break": request.fail_on_api_break,
+ "fail_on_untested_hotspots": request.fail_on_untested_hotspots,
"min_typing_coverage": request.min_typing_coverage,
"min_docstring_coverage": request.min_docstring_coverage,
+ "coverage_min": request.coverage_min,
},
}
with self._state_lock:
@@ -1376,9 +1443,21 @@ def _evaluate_gate_snapshot(
request: MCPGateRequest,
) -> GatingResult:
reasons: list[str] = []
+ if request.fail_on_untested_hotspots:
+ if record.coverage_join is None:
+ raise MCPServiceContractError(
+ "Coverage gating requires a run created with coverage_xml."
+ )
+ if record.coverage_join.status != "ok":
+ detail = record.coverage_join.invalid_reason or "invalid coverage input"
+ raise MCPServiceContractError(
+ "Coverage gating requires a valid Cobertura XML input. "
+ f"Reason: {detail}"
+ )
if record.project_metrics is not None:
metric_reasons = metric_gate_reasons(
project_metrics=record.project_metrics,
+ coverage_join=record.coverage_join,
metrics_diff=record.metrics_diff,
config=MetricGateConfig(
fail_complexity=request.fail_complexity,
@@ -1391,8 +1470,10 @@ def _evaluate_gate_snapshot(
fail_on_typing_regression=request.fail_on_typing_regression,
fail_on_docstring_regression=request.fail_on_docstring_regression,
fail_on_api_break=request.fail_on_api_break,
+ fail_on_untested_hotspots=request.fail_on_untested_hotspots,
min_typing_coverage=request.min_typing_coverage,
min_docstring_coverage=request.min_docstring_coverage,
+ coverage_min=request.coverage_min,
),
)
reasons.extend(f"metric:{reason}" for reason in metric_reasons)
@@ -1734,6 +1815,9 @@ def get_production_triage(
analysis_profile = self._summary_analysis_profile_payload(summary)
if analysis_profile:
payload["analysis_profile"] = analysis_profile
+ coverage_join = self._summary_coverage_join_payload(record)
+ if coverage_join:
+ payload["coverage_join"] = coverage_join
return payload
def get_help(
@@ -3324,6 +3408,7 @@ def _changed_analysis_payload(
),
"resolved_findings": 0,
"changed_findings": [],
+ "coverage_join": self._summary_coverage_join_payload(record),
}
def _augment_summary_with_changed(
@@ -3681,6 +3766,11 @@ def _validate_analysis_request(self, request: MCPAnalysisRequest) -> None:
"cache_policy='refresh' is not supported by the read-only "
"CodeClone MCP server. Use 'reuse' or 'off'."
)
+ if request.analysis_mode == "clones_only" and request.coverage_xml is not None:
+ raise MCPServiceContractError(
+ "coverage_xml requires analysis_mode='full' because coverage join "
+ "depends on metrics-enabled analysis."
+ )
@staticmethod
def _validate_choice(
@@ -3773,6 +3863,9 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp
typing_coverage=True,
docstring_coverage=True,
api_surface=False,
+ coverage_xml=None,
+ fail_on_untested_hotspots=False,
+ coverage_min=50,
design_complexity_threshold=DEFAULT_REPORT_DESIGN_COMPLEXITY_THRESHOLD,
design_coupling_threshold=DEFAULT_REPORT_DESIGN_COUPLING_THRESHOLD,
design_cohesion_threshold=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD,
@@ -3781,6 +3874,7 @@ def _build_args(self, *, root_path: Path, request: MCPAnalysisRequest) -> Namesp
skip_metrics=False,
skip_dead_code=False,
skip_dependencies=False,
+ golden_fixture_paths=(),
html_out=None,
json_out=None,
md_out=None,
@@ -3837,6 +3931,7 @@ def _apply_request_overrides(
"segment_min_loc": request.segment_min_loc,
"segment_min_stmt": request.segment_min_stmt,
"api_surface": request.api_surface,
+ "coverage_min": request.coverage_min,
"max_baseline_size_mb": request.max_baseline_size_mb,
"max_cache_size_mb": request.max_cache_size_mb,
"design_complexity_threshold": request.complexity_threshold,
@@ -3859,6 +3954,10 @@ def _apply_request_overrides(
args.cache_path = str(
self._resolve_optional_path(request.cache_path, root_path)
)
+ if request.coverage_xml is not None:
+ args.coverage_xml = str(
+ self._resolve_optional_path(request.coverage_xml, root_path)
+ )
@staticmethod
def _resolve_optional_path(value: str, root_path: Path) -> Path:
@@ -4094,6 +4193,10 @@ def _summary_payload(
analysis_profile = self._summary_analysis_profile_payload(summary)
if analysis_profile:
payload["analysis_profile"] = analysis_profile
+ if record is not None:
+ coverage_join = self._summary_coverage_join_payload(record)
+ if coverage_join:
+ payload["coverage_join"] = coverage_join
return payload
def _summary_analysis_profile_payload(
@@ -4299,6 +4402,40 @@ def _summary_diff_payload(
),
}
+ def _summary_coverage_join_payload(
+ self,
+ record: MCPRunRecord,
+ ) -> dict[str, object]:
+ metrics = self._as_mapping(record.report_document.get("metrics"))
+ families = self._as_mapping(metrics.get("families"))
+ coverage_join = self._as_mapping(families.get("coverage_join"))
+ summary = self._as_mapping(coverage_join.get("summary"))
+ if not summary:
+ return {}
+ payload: dict[str, object] = {
+ "status": str(summary.get("status", "")).strip(),
+ "overall_permille": _as_int(summary.get("overall_permille", 0), 0),
+ "coverage_hotspots": _as_int(summary.get("coverage_hotspots", 0), 0),
+ "scope_gap_hotspots": _as_int(summary.get("scope_gap_hotspots", 0), 0),
+ "hotspot_threshold_percent": _as_int(
+ summary.get("hotspot_threshold_percent", 0),
+ 0,
+ ),
+ }
+ source_value = summary.get("source")
+ source = source_value.strip() if isinstance(source_value, str) else ""
+ if source:
+ payload["source"] = source
+ invalid_reason_value = summary.get("invalid_reason")
+ invalid_reason = (
+ invalid_reason_value.strip()
+ if isinstance(invalid_reason_value, str)
+ else ""
+ )
+ if invalid_reason:
+ payload["invalid_reason"] = invalid_reason
+ return payload
+
def _metrics_detail_payload(
self,
*,
@@ -4437,14 +4574,7 @@ def _metrics_diff_payload(
)
)
),
- "new_api_symbols": len(
- tuple(
- cast(
- Sequence[object],
- getattr(metrics_diff, "new_api_symbols", ()),
- )
- )
- ),
+ "new_api_symbols": len(tuple(getattr(metrics_diff, "new_api_symbols", ()))),
}
def _dict_list(self, value: object) -> list[dict[str, object]]:
diff --git a/codeclone/metrics/__init__.py b/codeclone/metrics/__init__.py
index 14ea398..0551b7d 100644
--- a/codeclone/metrics/__init__.py
+++ b/codeclone/metrics/__init__.py
@@ -9,6 +9,7 @@
from .cohesion import cohesion_risk, compute_lcom4
from .complexity import cyclomatic_complexity, nesting_depth, risk_level
from .coupling import compute_cbo, coupling_risk
+from .coverage_join import CoverageJoinParseError, build_coverage_join
from .dead_code import find_suppressed_unused, find_unused
from .dependencies import (
build_dep_graph,
@@ -21,7 +22,9 @@
from .overloaded_modules import build_overloaded_modules_payload
__all__ = [
+ "CoverageJoinParseError",
"HealthInputs",
+ "build_coverage_join",
"build_dep_graph",
"build_import_graph",
"build_overloaded_modules_payload",
diff --git a/codeclone/metrics/coverage_join.py b/codeclone/metrics/coverage_join.py
new file mode 100644
index 0000000..08c8278
--- /dev/null
+++ b/codeclone/metrics/coverage_join.py
@@ -0,0 +1,331 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from __future__ import annotations
+
+from collections import defaultdict
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal, cast
+from xml.etree import ElementTree
+
+from .._coerce import as_int, as_str
+from ..models import CoverageJoinResult, GroupItemLike, UnitCoverageFact
+
+__all__ = [
+ "CoverageJoinParseError",
+ "build_coverage_join",
+]
+
+_Risk = Literal["low", "medium", "high"]
+_CoverageStatus = Literal["measured", "missing_from_report", "no_executable_lines"]
+
+_MEASURED_STATUS: _CoverageStatus = "measured"
+_MISSING_FROM_REPORT_STATUS: _CoverageStatus = "missing_from_report"
+_NO_EXECUTABLE_LINES_STATUS: _CoverageStatus = "no_executable_lines"
+_HOTSPOT_RISKS: frozenset[_Risk] = frozenset({"medium", "high"})
+
+
+class CoverageJoinParseError(ValueError):
+ """Raised when a Cobertura XML payload cannot be parsed safely."""
+
+
+@dataclass(frozen=True, slots=True)
+class _CoverageFileLines:
+ executable_lines: frozenset[int]
+ covered_lines: frozenset[int]
+
+
+@dataclass(frozen=True, slots=True)
+class _CoverageReport:
+ files: dict[str, _CoverageFileLines]
+
+
+def _permille(numerator: int, denominator: int) -> int:
+ if denominator <= 0:
+ return 0
+ return round((1000.0 * float(numerator)) / float(denominator))
+
+
+def _local_tag_name(tag: object) -> str:
+ if not isinstance(tag, str):
+ return ""
+ _, _, local_name = tag.rpartition("}")
+ return local_name or tag
+
+
+def _normalized_relpath_text(value: str) -> str:
+ return value.replace("\\", "/").strip()
+
+
+def _resolved_path(candidate: Path) -> Path:
+ try:
+ return candidate.expanduser().resolve(strict=False)
+ except OSError:
+ return candidate.expanduser().absolute()
+
+
+def _resolved_coverage_sources(
+ *,
+ root_element: ElementTree.Element,
+ root_path: Path,
+) -> tuple[Path, ...]:
+ resolved: list[Path] = []
+ seen: set[str] = set()
+ for element in root_element.iter():
+ text = _normalized_relpath_text(element.text or "")
+ if _local_tag_name(element.tag) != "source" or not text:
+ continue
+ source_path = Path(text)
+ if not source_path.is_absolute():
+ source_path = root_path / source_path
+ candidate = _resolved_path(source_path)
+ key = str(candidate)
+ if key not in seen:
+ resolved.append(candidate)
+ seen.add(key)
+ fallback = _resolved_path(root_path)
+ if str(fallback) not in seen:
+ resolved.insert(0, fallback)
+ return tuple(resolved)
+
+
+def _resolve_report_filename(
+ *,
+ filename: str,
+ root_path: Path,
+ source_roots: Sequence[Path],
+) -> str | None:
+ normalized_filename = _normalized_relpath_text(filename)
+ if not normalized_filename:
+ return None
+ raw_path = Path(normalized_filename)
+ candidates: list[Path] = []
+ if raw_path.is_absolute():
+ candidates.append(raw_path)
+ else:
+ candidates.append(root_path / raw_path)
+ candidates.extend(source_root / raw_path for source_root in source_roots)
+
+ unique_candidates: list[Path] = []
+ seen_candidates: set[str] = set()
+ for candidate in candidates:
+ resolved = _resolved_path(candidate)
+ key = str(resolved)
+ if key not in seen_candidates:
+ unique_candidates.append(resolved)
+ seen_candidates.add(key)
+
+ under_root_existing: list[Path] = []
+ under_root_fallback: list[Path] = []
+ for candidate in unique_candidates:
+ try:
+ candidate.relative_to(root_path)
+ except ValueError:
+ continue
+ if candidate.exists():
+ under_root_existing.append(candidate)
+ under_root_fallback.append(candidate)
+
+ if under_root_existing:
+ return str(sorted(under_root_existing)[0])
+ if under_root_fallback:
+ return str(under_root_fallback[0])
+ return None
+
+
+def _iter_cobertura_class_elements(
+ root_element: ElementTree.Element,
+) -> Sequence[ElementTree.Element]:
+ return tuple(
+ element
+ for element in root_element.iter()
+ if _local_tag_name(element.tag) == "class"
+ )
+
+
+def _iter_cobertura_line_hits(
+ class_element: ElementTree.Element,
+) -> Sequence[tuple[int, int]]:
+ rows: list[tuple[int, int]] = []
+ for line_element in class_element.iter():
+ if _local_tag_name(line_element.tag) == "line":
+ line_number = as_int(line_element.attrib.get("number"), -1)
+ hits = as_int(line_element.attrib.get("hits"), -1)
+ if line_number > 0 and hits >= 0:
+ rows.append((line_number, hits))
+ return tuple(rows)
+
+
+def _parse_coverage_report(
+ *,
+ coverage_xml: Path,
+ root_path: Path,
+) -> _CoverageReport:
+ try:
+ tree = ElementTree.parse(coverage_xml)
+ except (ElementTree.ParseError, OSError) as exc:
+ raise CoverageJoinParseError(
+ f"Invalid Cobertura XML at {coverage_xml}: {exc}"
+ ) from exc
+
+ root_element = tree.getroot()
+ source_roots = _resolved_coverage_sources(
+ root_element=root_element, root_path=root_path
+ )
+ file_lines: dict[str, dict[str, set[int]]] = defaultdict(
+ lambda: {"executable": set(), "covered": set()}
+ )
+
+ for element in _iter_cobertura_class_elements(root_element):
+ filename = element.attrib.get("filename", "")
+ resolved_filename = _resolve_report_filename(
+ filename=filename,
+ root_path=root_path,
+ source_roots=source_roots,
+ )
+ if resolved_filename is not None:
+ target = file_lines[resolved_filename]
+ for line_number, hits in _iter_cobertura_line_hits(element):
+ target["executable"].add(line_number)
+ if hits > 0:
+ target["covered"].add(line_number)
+
+ return _CoverageReport(
+ files={
+ filepath: _CoverageFileLines(
+ executable_lines=frozenset(sorted(lines["executable"])),
+ covered_lines=frozenset(sorted(lines["covered"])),
+ )
+ for filepath, lines in sorted(file_lines.items())
+ }
+ )
+
+
+def _unit_sort_key(item: GroupItemLike) -> tuple[str, int, int, str]:
+ return (
+ as_str(item.get("filepath")),
+ as_int(item.get("start_line")),
+ as_int(item.get("end_line")),
+ as_str(item.get("qualname")),
+ )
+
+
+def _resolve_unit_path(filepath: str) -> str:
+ return str(_resolved_path(Path(filepath)))
+
+
+def _risk_level(value: object) -> _Risk:
+ risk = as_str(value, "low")
+ if risk in {"low", "medium", "high"}:
+ return cast(_Risk, risk)
+ return "low"
+
+
+def _unit_coverage_fact(
+ *,
+ unit: GroupItemLike,
+ coverage_file: _CoverageFileLines | None,
+) -> UnitCoverageFact:
+ filepath = as_str(unit.get("filepath"))
+ start_line = as_int(unit.get("start_line"))
+ end_line = as_int(unit.get("end_line"))
+ coverage_status: _CoverageStatus
+ if coverage_file is None:
+ executable_lines = 0
+ covered_lines = 0
+ coverage_permille = 0
+ coverage_status = _MISSING_FROM_REPORT_STATUS
+ else:
+ executable_lines = sum(
+ 1
+ for line_number in coverage_file.executable_lines
+ if start_line <= line_number <= end_line
+ )
+ covered_lines = sum(
+ 1
+ for line_number in coverage_file.covered_lines
+ if start_line <= line_number <= end_line
+ )
+ coverage_permille = _permille(covered_lines, executable_lines)
+ coverage_status = (
+ _MEASURED_STATUS if executable_lines > 0 else _NO_EXECUTABLE_LINES_STATUS
+ )
+ return UnitCoverageFact(
+ qualname=as_str(unit.get("qualname")),
+ filepath=filepath,
+ start_line=start_line,
+ end_line=end_line,
+ cyclomatic_complexity=as_int(unit.get("cyclomatic_complexity"), 1),
+ risk=_risk_level(unit.get("risk")),
+ executable_lines=executable_lines,
+ covered_lines=covered_lines,
+ coverage_permille=coverage_permille,
+ coverage_status=coverage_status,
+ )
+
+
+def _is_coverage_hotspot(
+ *,
+ fact: UnitCoverageFact,
+ hotspot_threshold_percent: int,
+) -> bool:
+ if fact.risk not in _HOTSPOT_RISKS:
+ return False
+ if fact.coverage_status != _MEASURED_STATUS:
+ return False
+ return (fact.coverage_permille / 10.0) < float(hotspot_threshold_percent)
+
+
+def _is_scope_gap_hotspot(*, fact: UnitCoverageFact) -> bool:
+ return (
+ fact.risk in _HOTSPOT_RISKS
+ and fact.coverage_status == _MISSING_FROM_REPORT_STATUS
+ )
+
+
+def build_coverage_join(
+ *,
+ coverage_xml: Path,
+ root_path: Path,
+ units: Sequence[GroupItemLike],
+ hotspot_threshold_percent: int,
+) -> CoverageJoinResult:
+ report = _parse_coverage_report(coverage_xml=coverage_xml, root_path=root_path)
+ facts = tuple(
+ _unit_coverage_fact(
+ unit=unit,
+ coverage_file=report.files.get(
+ _resolve_unit_path(as_str(unit.get("filepath")))
+ ),
+ )
+ for unit in sorted(units, key=_unit_sort_key)
+ )
+ measured_units = sum(
+ 1 for fact in facts if fact.coverage_status == _MEASURED_STATUS
+ )
+ overall_executable_lines = sum(fact.executable_lines for fact in facts)
+ overall_covered_lines = sum(fact.covered_lines for fact in facts)
+ return CoverageJoinResult(
+ coverage_xml=str(_resolved_path(coverage_xml)),
+ status="ok",
+ hotspot_threshold_percent=hotspot_threshold_percent,
+ files=len(report.files),
+ measured_units=measured_units,
+ overall_executable_lines=overall_executable_lines,
+ overall_covered_lines=overall_covered_lines,
+ coverage_hotspots=sum(
+ 1
+ for fact in facts
+ if _is_coverage_hotspot(
+ fact=fact,
+ hotspot_threshold_percent=hotspot_threshold_percent,
+ )
+ ),
+ scope_gap_hotspots=sum(1 for fact in facts if _is_scope_gap_hotspot(fact=fact)),
+ units=facts,
+ )
diff --git a/codeclone/metrics_baseline.py b/codeclone/metrics_baseline.py
index 22f18ce..ed4197e 100644
--- a/codeclone/metrics_baseline.py
+++ b/codeclone/metrics_baseline.py
@@ -26,6 +26,7 @@
from .errors import BaselineValidationError
from .metrics.api_surface import compare_api_surfaces
from .models import (
+ ApiBreakingChange,
ApiParamSpec,
ApiSurfaceSnapshot,
MetricsDiff,
@@ -356,6 +357,7 @@ def save(self) -> None:
generator_name=self.generator_name or METRICS_BASELINE_GENERATOR,
generator_version=self.generator_version or __version__,
created_at=self.created_at or _now_utc_z(),
+ include_adoption=self.has_coverage_adoption_snapshot,
api_surface_snapshot=self.api_surface_snapshot,
api_surface_root=self.path.parent,
)
@@ -413,7 +415,6 @@ def save(self) -> None:
self.payload_sha256 = _require_str(
merged_meta, _METRICS_PAYLOAD_SHA256_KEY, path=self.path
)
- self.has_coverage_adoption_snapshot = True
self.api_surface_payload_sha256 = _optional_require_str(
merged_meta,
_API_SURFACE_PAYLOAD_SHA256_KEY,
@@ -433,7 +434,6 @@ def save(self) -> None:
self.python_tag = _require_str(payload_meta, "python_tag", path=self.path)
self.created_at = _require_str(payload_meta, "created_at", path=self.path)
self.payload_sha256 = payload_metrics_hash
- self.has_coverage_adoption_snapshot = True
self.api_surface_payload_sha256 = payload_api_surface_hash
def verify_compatibility(self, *, runtime_python_tag: str) -> None:
@@ -542,6 +542,8 @@ def from_project_metrics(
schema_version: str | None = None,
python_tag: str | None = None,
generator_version: str | None = None,
+ include_adoption: bool = True,
+ include_api_surface: bool = True,
) -> MetricsBaseline:
baseline = MetricsBaseline(path)
baseline.generator_name = METRICS_BASELINE_GENERATOR
@@ -550,15 +552,20 @@ def from_project_metrics(
baseline.python_tag = python_tag or current_python_tag()
baseline.created_at = _now_utc_z()
baseline.snapshot = snapshot_from_project_metrics(project_metrics)
- baseline.payload_sha256 = _compute_payload_sha256(baseline.snapshot)
- baseline.has_coverage_adoption_snapshot = True
- baseline.api_surface_snapshot = project_metrics.api_surface
+ baseline.payload_sha256 = _compute_payload_sha256(
+ baseline.snapshot,
+ include_adoption=include_adoption,
+ )
+ baseline.has_coverage_adoption_snapshot = include_adoption
+ baseline.api_surface_snapshot = (
+ project_metrics.api_surface if include_api_surface else None
+ )
baseline.api_surface_payload_sha256 = (
_compute_api_surface_payload_sha256(
- project_metrics.api_surface,
+ baseline.api_surface_snapshot,
root=baseline.path.parent,
)
- if project_metrics.api_surface is not None
+ if baseline.api_surface_snapshot is not None
else None
)
return baseline
@@ -610,11 +617,17 @@ def diff(self, current: ProjectMetrics) -> MetricsDiff:
set(current_snapshot.dead_code_items) - set(snapshot.dead_code_items)
)
)
- added_api_symbols, api_breaking_changes = compare_api_surfaces(
- baseline=self.api_surface_snapshot,
- current=current.api_surface,
- strict_types=False,
- )
+ added_api_symbols: tuple[str, ...]
+ api_breaking_changes: tuple[ApiBreakingChange, ...]
+ if self.api_surface_snapshot is None:
+ added_api_symbols = ()
+ api_breaking_changes = ()
+ else:
+ added_api_symbols, api_breaking_changes = compare_api_surfaces(
+ baseline=self.api_surface_snapshot,
+ current=current.api_surface,
+ strict_types=False,
+ )
return MetricsDiff(
new_high_risk_functions=new_high_risk_functions,
@@ -1252,10 +1265,14 @@ def _build_payload(
generator_name: str,
generator_version: str,
created_at: str,
+ include_adoption: bool = True,
api_surface_snapshot: ApiSurfaceSnapshot | None = None,
api_surface_root: Path | None = None,
) -> dict[str, Any]:
- payload_sha256 = _compute_payload_sha256(snapshot)
+ payload_sha256 = _compute_payload_sha256(
+ snapshot,
+ include_adoption=include_adoption,
+ )
payload: dict[str, Any] = {
"meta": {
"generator": {
@@ -1267,7 +1284,10 @@ def _build_payload(
"created_at": created_at,
"payload_sha256": payload_sha256,
},
- "metrics": _snapshot_payload(snapshot),
+ "metrics": _snapshot_payload(
+ snapshot,
+ include_adoption=include_adoption,
+ ),
}
if api_surface_snapshot is not None:
payload["meta"][_API_SURFACE_PAYLOAD_SHA256_KEY] = (
diff --git a/codeclone/models.py b/codeclone/models.py
index f34abf5..4814fc1 100644
--- a/codeclone/models.py
+++ b/codeclone/models.py
@@ -311,6 +311,45 @@ class ModuleDocstringCoverage:
public_symbol_documented: int
+@dataclass(frozen=True, slots=True)
+class UnitCoverageFact:
+ qualname: str
+ filepath: str
+ start_line: int
+ end_line: int
+ cyclomatic_complexity: int
+ risk: Literal["low", "medium", "high"]
+ executable_lines: int
+ covered_lines: int
+ coverage_permille: int
+ coverage_status: Literal["measured", "missing_from_report", "no_executable_lines"]
+
+
+@dataclass(frozen=True, slots=True)
+class CoverageJoinResult:
+ coverage_xml: str
+ status: Literal["ok", "invalid"]
+ hotspot_threshold_percent: int
+ files: int = 0
+ measured_units: int = 0
+ overall_executable_lines: int = 0
+ overall_covered_lines: int = 0
+ coverage_hotspots: int = 0
+ scope_gap_hotspots: int = 0
+ units: tuple[UnitCoverageFact, ...] = ()
+ invalid_reason: str | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class SuppressedCloneGroup:
+ kind: Literal["function", "block", "segment"]
+ group_key: str
+ items: tuple[GroupItem, ...]
+ matched_patterns: tuple[str, ...] = ()
+ suppression_rule: str = ""
+ suppression_source: str = ""
+
+
GroupItem = dict[str, object]
GroupItemLike = Mapping[str, object]
GroupItemsLike = Sequence[GroupItemLike]
diff --git a/codeclone/paths.py b/codeclone/paths.py
index c9a33a6..d93428f 100644
--- a/codeclone/paths.py
+++ b/codeclone/paths.py
@@ -8,12 +8,52 @@
from pathlib import Path
+from .domain.source_scope import (
+ SOURCE_KIND_FIXTURES,
+ SOURCE_KIND_OTHER,
+ SOURCE_KIND_PRODUCTION,
+ SOURCE_KIND_TESTS,
+)
+
_TEST_FILE_NAMES = {"conftest.py"}
+def normalize_repo_path(value: str) -> str:
+ return value.replace("\\", "/").strip()
+
+
+def relative_repo_path(filepath: str, *, scan_root: str = "") -> str:
+ normalized_path = normalize_repo_path(filepath)
+ normalized_root = normalize_repo_path(scan_root).rstrip("/")
+ if not normalized_path:
+ return normalized_path
+ if not normalized_root:
+ return normalized_path
+ prefix = f"{normalized_root}/"
+ if normalized_path.startswith(prefix):
+ return normalized_path[len(prefix) :]
+ if normalized_path == normalized_root:
+ return normalized_path.rsplit("/", maxsplit=1)[-1]
+ return normalized_path
+
+
+def classify_source_kind(filepath: str, *, scan_root: str = "") -> str:
+ rel = relative_repo_path(filepath, scan_root=scan_root)
+ parts = [part for part in rel.lower().split("/") if part and part != "."]
+ if not parts:
+ return SOURCE_KIND_OTHER
+ for idx, part in enumerate(parts):
+ if part != SOURCE_KIND_TESTS:
+ continue
+ if idx + 1 < len(parts) and parts[idx + 1] == SOURCE_KIND_FIXTURES:
+ return SOURCE_KIND_FIXTURES
+ return SOURCE_KIND_TESTS
+ return SOURCE_KIND_PRODUCTION
+
+
def is_test_filepath(filepath: str) -> bool:
- normalized = filepath.lower().replace("\\", "/")
- if "/tests/" in normalized or "/test/" in normalized:
+ source_kind = classify_source_kind(filepath)
+ if source_kind in {SOURCE_KIND_TESTS, SOURCE_KIND_FIXTURES}:
return True
filename = Path(filepath).name.lower()
return filename in _TEST_FILE_NAMES or filename.startswith("test_")
diff --git a/codeclone/pipeline.py b/codeclone/pipeline.py
index f87477b..0f8dc7e 100644
--- a/codeclone/pipeline.py
+++ b/codeclone/pipeline.py
@@ -36,9 +36,15 @@
from .domain.findings import CATEGORY_COHESION, CATEGORY_COMPLEXITY, CATEGORY_COUPLING
from .domain.quality import CONFIDENCE_HIGH, RISK_HIGH, RISK_LOW
from .extractor import extract_units_and_stats_from_source
+from .golden_fixtures import (
+ build_suppressed_clone_groups,
+ split_clone_groups_for_golden_fixtures,
+)
from .grouping import build_block_groups, build_groups, build_segment_groups
from .metrics import (
+ CoverageJoinParseError,
HealthInputs,
+ build_coverage_join,
build_dep_graph,
build_overloaded_modules_payload,
compute_health,
@@ -51,6 +57,7 @@
ApiSurfaceSnapshot,
BlockUnit,
ClassMetrics,
+ CoverageJoinResult,
DeadCandidate,
DeadItem,
DepGraph,
@@ -69,6 +76,7 @@
StructuralFindingGroup,
StructuralFindingOccurrence,
Suggestion,
+ SuppressedCloneGroup,
Unit,
)
from .normalize import NormalizationConfig
@@ -202,6 +210,8 @@ class AnalysisResult:
metrics_payload: dict[str, object] | None
suggestions: tuple[Suggestion, ...]
segment_groups_raw_digest: str
+ suppressed_clone_groups: tuple[SuppressedCloneGroup, ...] = ()
+ coverage_join: CoverageJoinResult | None = None
suppressed_dead_code_items: int = 0
structural_findings: tuple[StructuralFindingGroup, ...] = ()
@@ -234,8 +244,10 @@ class MetricGateConfig:
fail_on_typing_regression: bool = False
fail_on_docstring_regression: bool = False
fail_on_api_break: bool = False
+ fail_on_untested_hotspots: bool = False
min_typing_coverage: int = -1
min_docstring_coverage: int = -1
+ coverage_min: int = 50
def _as_sorted_str_tuple(value: object) -> tuple[str, ...]:
@@ -456,6 +468,18 @@ def bootstrap(
)
+def _resolve_optional_runtime_path(value: object, *, root: Path) -> Path | None:
+ text = str(value).strip() if value is not None else ""
+ if not text:
+ return None
+ candidate = Path(text).expanduser()
+ resolved = candidate if candidate.is_absolute() else root / candidate
+ try:
+ return resolved.resolve()
+ except OSError:
+ return resolved.absolute()
+
+
def _cache_entry_has_metrics(entry: CacheEntry) -> bool:
metric_keys = (
"class_metrics",
@@ -1029,6 +1053,62 @@ def process_file(
)
+def _invoke_process_file(
+ filepath: str,
+ root: str,
+ cfg: NormalizationConfig,
+ min_loc: int,
+ min_stmt: int,
+ *,
+ collect_structural_findings: bool,
+ collect_typing_coverage: bool,
+ collect_docstring_coverage: bool,
+ collect_api_surface: bool,
+ api_include_private_modules: bool,
+ block_min_loc: int,
+ block_min_stmt: int,
+ segment_min_loc: int,
+ segment_min_stmt: int,
+) -> FileProcessResult:
+ optional_kwargs: dict[str, object] = {
+ "collect_structural_findings": collect_structural_findings,
+ "collect_typing_coverage": collect_typing_coverage,
+ "collect_docstring_coverage": collect_docstring_coverage,
+ "collect_api_surface": collect_api_surface,
+ "api_include_private_modules": api_include_private_modules,
+ "block_min_loc": block_min_loc,
+ "block_min_stmt": block_min_stmt,
+ "segment_min_loc": segment_min_loc,
+ "segment_min_stmt": segment_min_stmt,
+ }
+ try:
+ signature = inspect.signature(process_file)
+ except (TypeError, ValueError):
+ supported_kwargs = optional_kwargs
+ else:
+ parameters = tuple(signature.parameters.values())
+ if any(
+ parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters
+ ):
+ supported_kwargs = optional_kwargs
+ else:
+ supported_names = {parameter.name for parameter in parameters}
+ supported_kwargs = {
+ key: value
+ for key, value in optional_kwargs.items()
+ if key in supported_names
+ }
+ process_callable = cast("Callable[..., FileProcessResult]", process_file)
+ return process_callable(
+ filepath,
+ root,
+ cfg,
+ min_loc,
+ min_stmt,
+ **supported_kwargs,
+ )
+
+
def process(
*,
boot: BootstrapResult,
@@ -1215,50 +1295,27 @@ def _accept_result(result: FileProcessResult) -> None:
def _run_sequential(files: Sequence[str]) -> None:
for filepath in files:
- _accept_result(_invoke_process_file(filepath))
+ _accept_result(
+ _invoke_process_file(
+ filepath,
+ root_str,
+ boot.config,
+ min_loc,
+ min_stmt,
+ collect_structural_findings=collect_structural_findings,
+ collect_typing_coverage=collect_typing_coverage,
+ collect_docstring_coverage=collect_docstring_coverage,
+ collect_api_surface=collect_api_surface,
+ api_include_private_modules=api_include_private_modules,
+ block_min_loc=block_min_loc,
+ block_min_stmt=block_min_stmt,
+ segment_min_loc=segment_min_loc,
+ segment_min_stmt=segment_min_stmt,
+ )
+ )
if on_advance is not None:
on_advance()
- def _invoke_process_file(filepath: str) -> FileProcessResult:
- optional_kwargs: dict[str, object] = {
- "collect_structural_findings": collect_structural_findings,
- "collect_typing_coverage": collect_typing_coverage,
- "collect_docstring_coverage": collect_docstring_coverage,
- "collect_api_surface": collect_api_surface,
- "api_include_private_modules": api_include_private_modules,
- "block_min_loc": block_min_loc,
- "block_min_stmt": block_min_stmt,
- "segment_min_loc": segment_min_loc,
- "segment_min_stmt": segment_min_stmt,
- }
- try:
- signature = inspect.signature(process_file)
- except (TypeError, ValueError):
- supported_kwargs = optional_kwargs
- else:
- parameters = tuple(signature.parameters.values())
- if any(
- parameter.kind == inspect.Parameter.VAR_KEYWORD
- for parameter in parameters
- ):
- supported_kwargs = optional_kwargs
- else:
- supported_names = {parameter.name for parameter in parameters}
- supported_kwargs = {
- key: value
- for key, value in optional_kwargs.items()
- if key in supported_names
- }
- process_callable = cast("Callable[..., FileProcessResult]", process_file)
- return process_callable(
- filepath,
- root_str,
- boot.config,
- min_loc,
- min_stmt,
- **supported_kwargs,
- )
-
if _should_use_parallel(len(files_to_process), processes):
try:
with ProcessPoolExecutor(max_workers=processes) as executor:
@@ -1268,6 +1325,19 @@ def _invoke_process_file(filepath: str) -> FileProcessResult:
executor.submit(
_invoke_process_file,
filepath,
+ root_str,
+ boot.config,
+ min_loc,
+ min_stmt,
+ collect_structural_findings=collect_structural_findings,
+ collect_typing_coverage=collect_typing_coverage,
+ collect_docstring_coverage=collect_docstring_coverage,
+ collect_api_surface=collect_api_surface,
+ api_include_private_modules=api_include_private_modules,
+ block_min_loc=block_min_loc,
+ block_min_stmt=block_min_stmt,
+ segment_min_loc=segment_min_loc,
+ segment_min_stmt=segment_min_stmt,
)
for filepath in batch
]
@@ -1531,6 +1601,91 @@ def _permille(numerator: int, denominator: int) -> int:
return round((1000.0 * float(numerator)) / float(denominator))
+def _coverage_join_summary(
+ coverage_join: CoverageJoinResult | None,
+) -> dict[str, object]:
+ if coverage_join is None:
+ return {}
+ return {
+ "status": coverage_join.status,
+ "source": coverage_join.coverage_xml,
+ "files": coverage_join.files,
+ "units": len(coverage_join.units),
+ "measured_units": coverage_join.measured_units,
+ "overall_executable_lines": coverage_join.overall_executable_lines,
+ "overall_covered_lines": coverage_join.overall_covered_lines,
+ "overall_permille": _permille(
+ coverage_join.overall_covered_lines,
+ coverage_join.overall_executable_lines,
+ ),
+ "missing_from_report_units": sum(
+ 1
+ for fact in coverage_join.units
+ if fact.coverage_status == "missing_from_report"
+ ),
+ "coverage_hotspots": coverage_join.coverage_hotspots,
+ "scope_gap_hotspots": coverage_join.scope_gap_hotspots,
+ "hotspot_threshold_percent": coverage_join.hotspot_threshold_percent,
+ "invalid_reason": coverage_join.invalid_reason,
+ }
+
+
+def _coverage_join_rows(
+ coverage_join: CoverageJoinResult | None,
+) -> list[dict[str, object]]:
+ if coverage_join is None or coverage_join.status != "ok":
+ return []
+ return sorted(
+ (
+ {
+ "qualname": fact.qualname,
+ "filepath": fact.filepath,
+ "start_line": fact.start_line,
+ "end_line": fact.end_line,
+ "cyclomatic_complexity": fact.cyclomatic_complexity,
+ "risk": fact.risk,
+ "executable_lines": fact.executable_lines,
+ "covered_lines": fact.covered_lines,
+ "coverage_permille": fact.coverage_permille,
+ "coverage_status": fact.coverage_status,
+ "coverage_hotspot": (
+ fact.risk in {"medium", "high"}
+ and fact.coverage_status == "measured"
+ and (fact.coverage_permille / 10.0)
+ < float(coverage_join.hotspot_threshold_percent)
+ ),
+ "scope_gap_hotspot": (
+ fact.risk in {"medium", "high"}
+ and fact.coverage_status == "missing_from_report"
+ ),
+ "coverage_review_item": (
+ (
+ fact.risk in {"medium", "high"}
+ and fact.coverage_status == "measured"
+ and (fact.coverage_permille / 10.0)
+ < float(coverage_join.hotspot_threshold_percent)
+ )
+ or (
+ fact.risk in {"medium", "high"}
+ and fact.coverage_status == "missing_from_report"
+ )
+ ),
+ }
+ for fact in coverage_join.units
+ ),
+ key=lambda item: (
+ 0 if bool(item.get("coverage_hotspot")) else 1,
+ 0 if bool(item.get("scope_gap_hotspot")) else 1,
+ {"high": 0, "medium": 1, "low": 2}.get(_as_str(item.get("risk")), 3),
+ _as_int(item.get("coverage_permille"), 0),
+ -_as_int(item.get("cyclomatic_complexity"), 0),
+ _as_str(item.get("filepath")),
+ _as_int(item.get("start_line")),
+ _as_str(item.get("qualname")),
+ ),
+ )
+
+
def _coverage_adoption_rows(
project_metrics: ProjectMetrics,
) -> list[dict[str, object]]:
@@ -1699,6 +1854,8 @@ def _enrich_metrics_report_payload(
*,
metrics_payload: Mapping[str, object],
metrics_diff: MetricsDiff | None,
+ coverage_adoption_diff_available: bool,
+ api_surface_diff_available: bool,
) -> dict[str, object]:
enriched = {
key: (dict(value) if isinstance(value, Mapping) else value)
@@ -1711,20 +1868,20 @@ def _enrich_metrics_report_payload(
cast("Mapping[str, object]", coverage_adoption.get("summary", {}))
)
if coverage_summary:
- coverage_summary["baseline_diff_available"] = metrics_diff is not None
+ coverage_summary["baseline_diff_available"] = coverage_adoption_diff_available
coverage_summary["param_delta"] = (
int(metrics_diff.typing_param_permille_delta)
- if metrics_diff is not None
+ if metrics_diff is not None and coverage_adoption_diff_available
else 0
)
coverage_summary["return_delta"] = (
int(metrics_diff.typing_return_permille_delta)
- if metrics_diff is not None
+ if metrics_diff is not None and coverage_adoption_diff_available
else 0
)
coverage_summary["docstring_delta"] = (
int(metrics_diff.docstring_permille_delta)
- if metrics_diff is not None
+ if metrics_diff is not None and coverage_adoption_diff_available
else 0
)
coverage_adoption["summary"] = coverage_summary
@@ -1734,17 +1891,23 @@ def _enrich_metrics_report_payload(
api_summary = dict(cast("Mapping[str, object]", api_surface.get("summary", {})))
api_items = list(cast("Sequence[object]", api_surface.get("items", ())))
if api_summary:
- api_summary["baseline_diff_available"] = metrics_diff is not None
+ api_summary["baseline_diff_available"] = api_surface_diff_available
api_summary["added"] = (
- len(metrics_diff.new_api_symbols) if metrics_diff is not None else 0
+ len(metrics_diff.new_api_symbols)
+ if metrics_diff is not None and api_surface_diff_available
+ else 0
)
api_summary["breaking"] = (
len(metrics_diff.new_api_breaking_changes)
- if metrics_diff is not None
+ if metrics_diff is not None and api_surface_diff_available
else 0
)
api_surface["summary"] = api_summary
- if metrics_diff is not None and metrics_diff.new_api_breaking_changes:
+ if (
+ metrics_diff is not None
+ and api_surface_diff_available
+ and metrics_diff.new_api_breaking_changes
+ ):
api_items.extend(
_breaking_api_surface_rows(metrics_diff.new_api_breaking_changes)
)
@@ -1758,6 +1921,7 @@ def build_metrics_report_payload(
*,
scan_root: str = "",
project_metrics: ProjectMetrics,
+ coverage_join: CoverageJoinResult | None = None,
units: Sequence[GroupItemLike],
class_metrics: Sequence[ClassMetrics],
module_deps: Sequence[ModuleDep] = (),
@@ -1820,6 +1984,8 @@ def build_metrics_report_payload(
coverage_adoption_rows = _coverage_adoption_rows(project_metrics)
api_surface_summary = _api_surface_summary(project_metrics.api_surface)
api_surface_items = _api_surface_rows(project_metrics.api_surface)
+ coverage_join_summary = _coverage_join_summary(coverage_join)
+ coverage_join_items = _coverage_join_rows(coverage_join)
def _serialize_dead_item(
item: DeadItem,
@@ -1843,7 +2009,7 @@ def _serialize_dead_item(
]
return payload
- return {
+ payload = {
CATEGORY_COMPLEXITY: {
"functions": complexity_rows,
"summary": {
@@ -1952,6 +2118,12 @@ def _serialize_dead_item(
module_deps=module_deps,
),
}
+ if coverage_join is not None:
+ payload["coverage_join"] = {
+ "summary": dict(coverage_join_summary),
+ "items": coverage_join_items,
+ }
+ return payload
def analyze(
@@ -1960,9 +2132,34 @@ def analyze(
discovery: DiscoveryResult,
processing: ProcessingResult,
) -> AnalysisResult:
- func_groups = build_groups(processing.units)
- block_groups = build_block_groups(processing.blocks)
- segment_groups_raw = build_segment_groups(processing.segments)
+ golden_fixture_paths = tuple(
+ str(pattern).strip()
+ for pattern in getattr(boot.args, "golden_fixture_paths", ())
+ if str(pattern).strip()
+ )
+
+ func_split = split_clone_groups_for_golden_fixtures(
+ groups=build_groups(processing.units),
+ kind="function",
+ golden_fixture_paths=golden_fixture_paths,
+ scan_root=str(boot.root),
+ )
+ block_split = split_clone_groups_for_golden_fixtures(
+ groups=build_block_groups(processing.blocks),
+ kind="block",
+ golden_fixture_paths=golden_fixture_paths,
+ scan_root=str(boot.root),
+ )
+ segment_split = split_clone_groups_for_golden_fixtures(
+ groups=build_segment_groups(processing.segments),
+ kind="segment",
+ golden_fixture_paths=golden_fixture_paths,
+ scan_root=str(boot.root),
+ )
+
+ func_groups = func_split.active_groups
+ block_groups = block_split.active_groups
+ segment_groups_raw = segment_split.active_groups
segment_groups_raw_digest = _segment_groups_digest(segment_groups_raw)
cached_projection = discovery.cached_segment_report_projection
if (
@@ -1992,7 +2189,38 @@ def analyze(
)
block_groups_report = prepare_block_report_groups(block_groups)
- block_group_facts = build_block_group_facts(block_groups_report)
+ suppressed_block_groups_report = prepare_block_report_groups(
+ block_split.suppressed_groups
+ )
+ if segment_split.suppressed_groups:
+ suppressed_segment_groups_report, _ = prepare_segment_report_groups(
+ segment_split.suppressed_groups
+ )
+ else:
+ suppressed_segment_groups_report = {}
+ suppressed_clone_groups = (
+ *build_suppressed_clone_groups(
+ kind="function",
+ groups=func_split.suppressed_groups,
+ matched_patterns=func_split.matched_patterns,
+ ),
+ *build_suppressed_clone_groups(
+ kind="block",
+ groups=suppressed_block_groups_report,
+ matched_patterns=block_split.matched_patterns,
+ ),
+ *build_suppressed_clone_groups(
+ kind="segment",
+ groups=suppressed_segment_groups_report,
+ matched_patterns=segment_split.matched_patterns,
+ ),
+ )
+ block_group_facts = build_block_group_facts(
+ {
+ **block_groups_report,
+ **suppressed_block_groups_report,
+ }
+ )
func_clones_count = len(func_groups)
block_clones_count = len(block_groups)
@@ -2003,6 +2231,7 @@ def analyze(
metrics_payload: dict[str, object] | None = None
suggestions: tuple[Suggestion, ...] = ()
suppressed_dead_items: tuple[DeadItem, ...] = ()
+ coverage_join: CoverageJoinResult | None = None
cohort_structural_findings: tuple[StructuralFindingGroup, ...] = ()
if _should_collect_structural_findings(boot.output_paths):
cohort_structural_findings = build_clone_cohort_structural_findings(
@@ -2048,9 +2277,33 @@ def analyze(
structural_findings=combined_structural_findings,
scan_root=str(boot.root),
)
+ coverage_xml_path = _resolve_optional_runtime_path(
+ getattr(boot.args, "coverage_xml", None),
+ root=boot.root,
+ )
+ if coverage_xml_path is not None:
+ try:
+ coverage_join = build_coverage_join(
+ coverage_xml=coverage_xml_path,
+ root_path=boot.root,
+ units=processing.units,
+ hotspot_threshold_percent=int(
+ getattr(boot.args, "coverage_min", 50)
+ ),
+ )
+ except CoverageJoinParseError as exc:
+ coverage_join = CoverageJoinResult(
+ coverage_xml=str(coverage_xml_path),
+ status="invalid",
+ hotspot_threshold_percent=int(
+ getattr(boot.args, "coverage_min", 50)
+ ),
+ invalid_reason=str(exc),
+ )
metrics_payload = build_metrics_report_payload(
scan_root=str(boot.root),
project_metrics=project_metrics,
+ coverage_join=coverage_join,
units=processing.units,
class_metrics=processing.class_metrics,
module_deps=processing.module_deps,
@@ -2063,6 +2316,7 @@ def analyze(
block_groups=block_groups,
block_groups_report=block_groups_report,
segment_groups=segment_groups,
+ suppressed_clone_groups=tuple(suppressed_clone_groups),
suppressed_segment_groups=suppressed_segment_groups,
block_group_facts=block_group_facts,
func_clones_count=func_clones_count,
@@ -2073,6 +2327,7 @@ def analyze(
metrics_payload=metrics_payload,
suggestions=suggestions,
segment_groups_raw_digest=segment_groups_raw_digest,
+ coverage_join=coverage_join,
suppressed_dead_code_items=len(suppressed_dead_items),
structural_findings=combined_structural_findings,
)
@@ -2101,6 +2356,8 @@ def report(
new_block: Collection[str],
html_builder: Callable[..., str] | None = None,
metrics_diff: object | None = None,
+ coverage_adoption_diff_available: bool = False,
+ api_surface_diff_available: bool = False,
include_report_document: bool = False,
) -> ReportArtifacts:
contents: dict[str, str | None] = {
@@ -2148,6 +2405,8 @@ def report(
_enrich_metrics_report_payload(
metrics_payload=analysis.metrics_payload,
metrics_diff=cast("MetricsDiff | None", metrics_diff),
+ coverage_adoption_diff_available=coverage_adoption_diff_available,
+ api_surface_diff_available=api_surface_diff_available,
)
if analysis.metrics_payload is not None
else None
@@ -2156,6 +2415,7 @@ def report(
func_groups=analysis.func_groups,
block_groups=analysis.block_groups_report,
segment_groups=analysis.segment_groups,
+ suppressed_clone_groups=analysis.suppressed_clone_groups,
meta=report_meta,
inventory=report_inventory,
block_facts=analysis.block_group_facts,
@@ -2172,6 +2432,8 @@ def report(
_enrich_metrics_report_payload(
metrics_payload=analysis.metrics_payload,
metrics_diff=cast("MetricsDiff | None", metrics_diff),
+ coverage_adoption_diff_available=coverage_adoption_diff_available,
+ api_surface_diff_available=api_surface_diff_available,
)
if analysis.metrics_payload is not None
else None
@@ -2251,6 +2513,7 @@ def _render_projection_artifact(
def metric_gate_reasons(
*,
project_metrics: ProjectMetrics,
+ coverage_join: CoverageJoinResult | None,
metrics_diff: MetricsDiff | None,
config: MetricGateConfig,
) -> tuple[str, ...]:
@@ -2271,6 +2534,11 @@ def metric_gate_reasons(
project_metrics=project_metrics,
config=config,
)
+ _append_coverage_join_reasons(
+ reasons=reasons,
+ coverage_join=coverage_join,
+ config=config,
+ )
return tuple(reasons)
@@ -2441,6 +2709,24 @@ def _append_adoption_metric_reasons(
)
+def _append_coverage_join_reasons(
+ *,
+ reasons: list[str],
+ coverage_join: CoverageJoinResult | None,
+ config: MetricGateConfig,
+) -> None:
+ if not config.fail_on_untested_hotspots or coverage_join is None:
+ return
+ if coverage_join.status != "ok":
+ return
+ if coverage_join.coverage_hotspots > 0:
+ reasons.append(
+ "Coverage hotspots detected: "
+ f"hotspots={coverage_join.coverage_hotspots}, "
+ f"threshold={config.coverage_min}%."
+ )
+
+
def _high_confidence_dead_code_count(items: Sequence[DeadItem]) -> int:
return sum(1 for item in items if item.confidence == "high")
@@ -2458,6 +2744,7 @@ def gate(
if analysis.project_metrics is not None:
metric_reasons = metric_gate_reasons(
project_metrics=analysis.project_metrics,
+ coverage_join=analysis.coverage_join,
metrics_diff=metrics_diff,
config=MetricGateConfig(
fail_complexity=boot.args.fail_complexity,
@@ -2474,10 +2761,14 @@ def gate(
getattr(boot.args, "fail_on_docstring_regression", False)
),
fail_on_api_break=bool(getattr(boot.args, "fail_on_api_break", False)),
+ fail_on_untested_hotspots=bool(
+ getattr(boot.args, "fail_on_untested_hotspots", False)
+ ),
min_typing_coverage=int(getattr(boot.args, "min_typing_coverage", -1)),
min_docstring_coverage=int(
getattr(boot.args, "min_docstring_coverage", -1)
),
+ coverage_min=int(getattr(boot.args, "coverage_min", 50)),
),
)
reasons.extend(f"metric:{reason}" for reason in metric_reasons)
diff --git a/codeclone/report/derived.py b/codeclone/report/derived.py
index a3de937..6873a08 100644
--- a/codeclone/report/derived.py
+++ b/codeclone/report/derived.py
@@ -25,6 +25,12 @@
SOURCE_KIND_ORDER as _SOURCE_KIND_ORDER,
)
from ..models import ReportLocation, SourceKind, StructuralFindingOccurrence
+from ..paths import (
+ classify_source_kind as _classify_source_kind,
+)
+from ..paths import (
+ relative_repo_path as _relative_repo_path,
+)
if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, Sequence
@@ -56,37 +62,19 @@
}
-def _normalize_path(value: str) -> str:
- return value.replace("\\", "/").strip()
-
-
def relative_report_path(filepath: str, *, scan_root: str = "") -> str:
- normalized_path = _normalize_path(filepath)
- normalized_root = _normalize_path(scan_root).rstrip("/")
- if not normalized_path:
- return normalized_path
- if not normalized_root:
- return normalized_path
- prefix = f"{normalized_root}/"
- if normalized_path.startswith(prefix):
- return normalized_path[len(prefix) :]
- if normalized_path == normalized_root:
- return normalized_path.rsplit("/", maxsplit=1)[-1]
- return normalized_path
+ return _relative_repo_path(filepath, scan_root=scan_root)
def classify_source_kind(filepath: str, *, scan_root: str = "") -> SourceKind:
- rel = relative_report_path(filepath, scan_root=scan_root)
- parts = [part for part in rel.lower().split("/") if part and part != "."]
- if not parts:
- return SOURCE_KIND_OTHER
- for idx, part in enumerate(parts):
- if part != SOURCE_KIND_TESTS:
- continue
- if idx + 1 < len(parts) and parts[idx + 1] == SOURCE_KIND_FIXTURES:
- return SOURCE_KIND_FIXTURES
+ normalized = _classify_source_kind(filepath, scan_root=scan_root)
+ if normalized == SOURCE_KIND_PRODUCTION:
+ return SOURCE_KIND_PRODUCTION
+ if normalized == SOURCE_KIND_TESTS:
return SOURCE_KIND_TESTS
- return SOURCE_KIND_PRODUCTION
+ if normalized == SOURCE_KIND_FIXTURES:
+ return SOURCE_KIND_FIXTURES
+ return SOURCE_KIND_OTHER
def source_kind_breakdown(
diff --git a/codeclone/report/json_contract.py b/codeclone/report/json_contract.py
index 4e09395..decfeb6 100644
--- a/codeclone/report/json_contract.py
+++ b/codeclone/report/json_contract.py
@@ -27,6 +27,7 @@
CATEGORY_COHESION,
CATEGORY_COMPLEXITY,
CATEGORY_COUPLING,
+ CATEGORY_COVERAGE,
CATEGORY_DEAD_CODE,
CATEGORY_DEPENDENCY,
CLONE_KIND_BLOCK,
@@ -39,6 +40,8 @@
FAMILY_DEAD_CODE,
FAMILY_DESIGN,
FAMILY_STRUCTURAL,
+ FINDING_KIND_COVERAGE_HOTSPOT,
+ FINDING_KIND_COVERAGE_SCOPE_GAP,
)
from ..domain.quality import (
CONFIDENCE_HIGH,
@@ -91,6 +94,7 @@
SourceKind,
StructuralFindingGroup,
Suggestion,
+ SuppressedCloneGroup,
)
__all__ = [
@@ -104,6 +108,7 @@
_OVERLOADED_MODULES_FAMILY = "overloaded_modules"
_COVERAGE_ADOPTION_FAMILY = "coverage_adoption"
_API_SURFACE_FAMILY = "api_surface"
+_COVERAGE_JOIN_FAMILY = "coverage_join"
def _optional_str(value: object) -> str | None:
@@ -390,6 +395,12 @@ def _collect_paths_from_metrics(metrics: Mapping[str, object]) -> set[str]:
filepath = _optional_str(item_map.get("filepath"))
if filepath is not None:
paths.add(filepath)
+ coverage_join = _as_mapping(metrics.get(_COVERAGE_JOIN_FAMILY))
+ for item in _as_sequence(coverage_join.get("items")):
+ item_map = _as_mapping(item)
+ filepath = _optional_str(item_map.get("filepath"))
+ if filepath is not None:
+ paths.add(filepath)
return paths
@@ -399,6 +410,7 @@ def _collect_report_file_list(
func_groups: GroupMapLike,
block_groups: GroupMapLike,
segment_groups: GroupMapLike,
+ suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None,
metrics: Mapping[str, object] | None,
structural_findings: Sequence[StructuralFindingGroup] | None,
) -> list[str]:
@@ -414,11 +426,16 @@ def _collect_report_file_list(
filepath = _optional_str(item.get("filepath"))
if filepath is not None:
files.add(filepath)
+ for suppressed_group in suppressed_clone_groups or ():
+ for item in suppressed_group.items:
+ filepath = _optional_str(item.get("filepath"))
+ if filepath is not None:
+ files.add(filepath)
if metrics is not None:
files.update(_collect_paths_from_metrics(metrics))
if structural_findings:
- for group in normalize_structural_findings(structural_findings):
- for occurrence in group.items:
+ for structural_group in normalize_structural_findings(structural_findings):
+ for occurrence in structural_group.items:
filepath = _optional_str(occurrence.file_path)
if filepath is not None:
files.add(filepath)
@@ -813,6 +830,45 @@ def _normalize_suppressed_by(
item["record_kind"],
),
)
+ coverage_join = _as_mapping(metrics_map.get(_COVERAGE_JOIN_FAMILY))
+ coverage_join_summary = _as_mapping(coverage_join.get("summary"))
+ coverage_join_items = sorted(
+ (
+ {
+ "relative_path": _contract_path(
+ item_map.get("filepath", ""),
+ scan_root=scan_root,
+ )[0]
+ or "",
+ "qualname": str(item_map.get("qualname", "")).strip(),
+ "start_line": _as_int(item_map.get("start_line")),
+ "end_line": _as_int(item_map.get("end_line")),
+ "cyclomatic_complexity": _as_int(
+ item_map.get("cyclomatic_complexity"),
+ 1,
+ ),
+ "risk": str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW,
+ "executable_lines": _as_int(item_map.get("executable_lines")),
+ "covered_lines": _as_int(item_map.get("covered_lines")),
+ "coverage_permille": _as_int(item_map.get("coverage_permille")),
+ "coverage_status": str(item_map.get("coverage_status", "")).strip(),
+ "coverage_hotspot": bool(item_map.get("coverage_hotspot")),
+ "scope_gap_hotspot": bool(item_map.get("scope_gap_hotspot")),
+ }
+ for item in _as_sequence(coverage_join.get("items"))
+ for item_map in (_as_mapping(item),)
+ ),
+ key=lambda item: (
+ 0 if bool(item["coverage_hotspot"]) else 1,
+ 0 if bool(item["scope_gap_hotspot"]) else 1,
+ {"high": 0, "medium": 1, "low": 2}.get(str(item["risk"]), 3),
+ _as_int(item["coverage_permille"]),
+ -_as_int(item["cyclomatic_complexity"]),
+ item["relative_path"],
+ _as_int(item["start_line"]),
+ item["qualname"],
+ ),
+ )
dead_high_confidence = sum(
1
for item in dead_items
@@ -1004,6 +1060,45 @@ def _normalize_suppressed_by(
"items_truncated": False,
},
}
+ if coverage_join_summary or coverage_join_items or coverage_join:
+ normalized[_COVERAGE_JOIN_FAMILY] = {
+ "summary": {
+ "status": str(coverage_join_summary.get("status", "")),
+ "source": _contract_path(
+ coverage_join_summary.get("source", ""),
+ scan_root=scan_root,
+ )[0],
+ "files": _as_int(coverage_join_summary.get("files")),
+ "units": _as_int(coverage_join_summary.get("units")),
+ "measured_units": _as_int(coverage_join_summary.get("measured_units")),
+ "overall_executable_lines": _as_int(
+ coverage_join_summary.get("overall_executable_lines")
+ ),
+ "overall_covered_lines": _as_int(
+ coverage_join_summary.get("overall_covered_lines")
+ ),
+ "overall_permille": _as_int(
+ coverage_join_summary.get("overall_permille")
+ ),
+ "missing_from_report_units": _as_int(
+ coverage_join_summary.get("missing_from_report_units")
+ ),
+ "coverage_hotspots": _as_int(
+ coverage_join_summary.get("coverage_hotspots")
+ ),
+ "scope_gap_hotspots": _as_int(
+ coverage_join_summary.get("scope_gap_hotspots")
+ ),
+ "hotspot_threshold_percent": _as_int(
+ coverage_join_summary.get("hotspot_threshold_percent")
+ ),
+ "invalid_reason": _optional_str(
+ coverage_join_summary.get("invalid_reason")
+ ),
+ },
+ "items": coverage_join_items,
+ "items_truncated": False,
+ }
return normalized
@@ -1392,6 +1487,83 @@ def _build_clone_groups(
return encoded_groups
+def _build_suppressed_clone_groups(
+ *,
+ groups: Sequence[SuppressedCloneGroup] | None,
+ block_facts: Mapping[str, Mapping[str, str]],
+ scan_root: str,
+) -> dict[str, list[dict[str, object]]]:
+ buckets: dict[str, list[dict[str, object]]] = {
+ CLONE_KIND_FUNCTION: [],
+ CLONE_KIND_BLOCK: [],
+ CLONE_KIND_SEGMENT: [],
+ }
+ for group in groups or ():
+ items = group.items
+ clone_type = classify_clone_type(items=items, kind=group.kind)
+ severity, priority = _clone_group_assessment(
+ count=len(items),
+ clone_type=clone_type,
+ )
+ locations = tuple(
+ report_location_from_group_item(item, scan_root=scan_root) for item in items
+ )
+ source_scope = _source_scope_from_locations(
+ [
+ {
+ "source_kind": location.source_kind,
+ }
+ for location in locations
+ ]
+ )
+ spread_files, spread_functions = group_spread(locations)
+ rows = sorted(
+ [
+ _clone_item_payload(
+ item,
+ kind=group.kind,
+ scan_root=scan_root,
+ )
+ for item in items
+ ],
+ key=_item_sort_key,
+ )
+ facts, display_facts = _build_clone_group_facts(
+ group_key=group.group_key,
+ kind=group.kind,
+ items=items,
+ block_facts=block_facts,
+ )
+ encoded: dict[str, object] = {
+ "id": clone_group_id(group.kind, group.group_key),
+ "family": FAMILY_CLONE,
+ "category": group.kind,
+ "kind": "clone_group",
+ "severity": severity,
+ "confidence": CONFIDENCE_HIGH,
+ "priority": priority,
+ "clone_kind": group.kind,
+ "clone_type": clone_type,
+ "count": len(items),
+ "source_scope": source_scope,
+ "spread": {
+ "files": spread_files,
+ "functions": spread_functions,
+ },
+ "items": rows,
+ "facts": facts,
+ "suppression_rule": group.suppression_rule,
+ "suppression_source": group.suppression_source,
+ "matched_patterns": list(group.matched_patterns),
+ }
+ if display_facts:
+ encoded["display_facts"] = display_facts
+ buckets[group.kind].append(encoded)
+ for bucket in buckets.values():
+ bucket.sort(key=lambda group: (-_as_int(group.get("count")), str(group["id"])))
+ return buckets
+
+
def _structural_group_assessment(
*,
finding_kind: str,
@@ -1878,6 +2050,80 @@ def _dependency_design_group(
}
+def _coverage_design_group(
+ item_map: Mapping[str, object],
+ *,
+ threshold_percent: int,
+ scan_root: str,
+) -> dict[str, object] | None:
+ coverage_hotspot = bool(item_map.get("coverage_hotspot"))
+ scope_gap_hotspot = bool(item_map.get("scope_gap_hotspot"))
+ if not coverage_hotspot and not scope_gap_hotspot:
+ return None
+ qualname = str(item_map.get("qualname", "")).strip()
+ filepath = str(item_map.get("relative_path", "")).strip()
+ if not filepath:
+ return None
+ start_line = _as_int(item_map.get("start_line"))
+ end_line = _as_int(item_map.get("end_line"))
+ subject_key = qualname or f"{filepath}:{start_line}:{end_line}"
+ risk = str(item_map.get("risk", RISK_LOW)).strip() or RISK_LOW
+ coverage_status = str(item_map.get("coverage_status", "")).strip()
+ coverage_permille = _as_int(item_map.get("coverage_permille"))
+ covered_lines = _as_int(item_map.get("covered_lines"))
+ executable_lines = _as_int(item_map.get("executable_lines"))
+ complexity = _as_int(item_map.get("cyclomatic_complexity"), 1)
+ severity = SEVERITY_CRITICAL if risk == "high" else SEVERITY_WARNING
+ if scope_gap_hotspot:
+ kind = FINDING_KIND_COVERAGE_SCOPE_GAP
+ detail = "The supplied coverage.xml did not map to this function's file."
+ else:
+ kind = FINDING_KIND_COVERAGE_HOTSPOT
+ detail = "Joined line coverage is below the configured hotspot threshold."
+ return {
+ "id": design_group_id(CATEGORY_COVERAGE, subject_key),
+ "family": FAMILY_DESIGN,
+ "category": CATEGORY_COVERAGE,
+ "kind": kind,
+ "severity": severity,
+ "confidence": CONFIDENCE_HIGH,
+ "priority": _priority(severity, EFFORT_MODERATE),
+ "count": 1,
+ "source_scope": _single_location_source_scope(
+ filepath,
+ scan_root=scan_root,
+ ),
+ "spread": {"files": 1, "functions": 1},
+ "items": [
+ {
+ "relative_path": filepath,
+ "qualname": qualname,
+ "start_line": start_line,
+ "end_line": end_line,
+ "risk": risk,
+ "cyclomatic_complexity": complexity,
+ "coverage_permille": coverage_permille,
+ "coverage_status": coverage_status,
+ "covered_lines": covered_lines,
+ "executable_lines": executable_lines,
+ "coverage_hotspot": coverage_hotspot,
+ "scope_gap_hotspot": scope_gap_hotspot,
+ }
+ ],
+ "facts": {
+ "coverage_permille": coverage_permille,
+ "hotspot_threshold_percent": threshold_percent,
+ "coverage_status": coverage_status,
+ "covered_lines": covered_lines,
+ "executable_lines": executable_lines,
+ "cyclomatic_complexity": complexity,
+ "coverage_hotspot": coverage_hotspot,
+ "scope_gap_hotspot": scope_gap_hotspot,
+ "detail": detail,
+ },
+ }
+
+
def _build_design_groups(
metrics_payload: Mapping[str, object],
*,
@@ -1898,6 +2144,11 @@ def _build_design_groups(
_as_mapping(thresholds.get(CATEGORY_COHESION)).get("value"),
default=DEFAULT_REPORT_DESIGN_COHESION_THRESHOLD,
)
+ coverage_join = _as_mapping(families.get(_COVERAGE_JOIN_FAMILY))
+ coverage_threshold = _as_int(
+ _as_mapping(coverage_join.get("summary")).get("hotspot_threshold_percent"),
+ 50,
+ )
groups: list[dict[str, object]] = []
complexity = _as_mapping(families.get(CATEGORY_COMPLEXITY))
@@ -1936,6 +2187,15 @@ def _build_design_groups(
if group is not None:
groups.append(group)
+ for item in _as_sequence(coverage_join.get("items")):
+ group = _coverage_design_group(
+ _as_mapping(item),
+ threshold_percent=coverage_threshold,
+ scan_root=scan_root,
+ )
+ if group is not None:
+ groups.append(group)
+
groups.sort(key=lambda group: (-_as_float(group["priority"]), str(group["id"])))
return groups
@@ -1948,6 +2208,7 @@ def _findings_summary(
structural_groups: Sequence[Mapping[str, object]],
dead_code_groups: Sequence[Mapping[str, object]],
design_groups: Sequence[Mapping[str, object]],
+ suppressed_clone_groups: Mapping[str, Sequence[Mapping[str, object]]] | None = None,
dead_code_suppressed: int = 0,
) -> dict[str, object]:
flat_groups = [
@@ -1979,6 +2240,42 @@ def _findings_summary(
if impact_scope in source_scope_counts:
source_scope_counts[impact_scope] += 1
clone_groups = [*clone_functions, *clone_blocks, *clone_segments]
+ clone_suppressed_map = _as_mapping(suppressed_clone_groups)
+ suppressed_functions = len(_as_sequence(clone_suppressed_map.get("function")))
+ suppressed_blocks = len(_as_sequence(clone_suppressed_map.get("block")))
+ suppressed_segments = len(_as_sequence(clone_suppressed_map.get("segment")))
+ suppressed_clone_total = (
+ suppressed_functions + suppressed_blocks + suppressed_segments
+ )
+ clones_summary: dict[str, object] = {
+ "functions": len(clone_functions),
+ "blocks": len(clone_blocks),
+ "segments": len(clone_segments),
+ CLONE_NOVELTY_NEW: sum(
+ 1
+ for group in clone_groups
+ if str(group.get("novelty", "")) == CLONE_NOVELTY_NEW
+ ),
+ CLONE_NOVELTY_KNOWN: sum(
+ 1
+ for group in clone_groups
+ if str(group.get("novelty", "")) == CLONE_NOVELTY_KNOWN
+ ),
+ }
+ if suppressed_clone_total > 0:
+ clones_summary.update(
+ {
+ "suppressed": suppressed_clone_total,
+ "suppressed_functions": suppressed_functions,
+ "suppressed_blocks": suppressed_blocks,
+ "suppressed_segments": suppressed_segments,
+ }
+ )
+ suppressed_summary = {
+ FAMILY_DEAD_CODE: max(0, dead_code_suppressed),
+ }
+ if suppressed_clone_total > 0:
+ suppressed_summary[FAMILY_CLONES] = suppressed_clone_total
return {
"total": len(flat_groups),
"families": {
@@ -1989,24 +2286,8 @@ def _findings_summary(
},
"severity": severity_counts,
"impact_scope": source_scope_counts,
- "clones": {
- "functions": len(clone_functions),
- "blocks": len(clone_blocks),
- "segments": len(clone_segments),
- CLONE_NOVELTY_NEW: sum(
- 1
- for group in clone_groups
- if str(group.get("novelty", "")) == CLONE_NOVELTY_NEW
- ),
- CLONE_NOVELTY_KNOWN: sum(
- 1
- for group in clone_groups
- if str(group.get("novelty", "")) == CLONE_NOVELTY_KNOWN
- ),
- },
- "suppressed": {
- FAMILY_DEAD_CODE: max(0, dead_code_suppressed),
- },
+ "clones": clones_summary,
+ "suppressed": suppressed_summary,
}
@@ -2389,6 +2670,7 @@ def _build_findings_payload(
new_function_group_keys: Collection[str] | None,
new_block_group_keys: Collection[str] | None,
new_segment_group_keys: Collection[str] | None,
+ suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None,
design_thresholds: Mapping[str, object] | None,
scan_root: str,
) -> dict[str, object]:
@@ -2439,6 +2721,22 @@ def _build_findings_payload(
design_thresholds=design_thresholds,
scan_root=scan_root,
)
+ suppressed_clone_payload = _build_suppressed_clone_groups(
+ groups=suppressed_clone_groups,
+ block_facts=block_facts,
+ scan_root=scan_root,
+ )
+ clone_groups_payload: dict[str, object] = {
+ "functions": clone_functions,
+ "blocks": clone_blocks,
+ "segments": clone_segments,
+ }
+ if any(suppressed_clone_payload.values()):
+ clone_groups_payload["suppressed"] = {
+ "functions": suppressed_clone_payload[CLONE_KIND_FUNCTION],
+ "blocks": suppressed_clone_payload[CLONE_KIND_BLOCK],
+ "segments": suppressed_clone_payload[CLONE_KIND_SEGMENT],
+ }
return {
"summary": _findings_summary(
clone_functions=clone_functions,
@@ -2447,14 +2745,11 @@ def _build_findings_payload(
structural_groups=structural_groups,
dead_code_groups=dead_code_groups,
design_groups=design_groups,
+ suppressed_clone_groups=suppressed_clone_payload,
dead_code_suppressed=dead_code_suppressed,
),
"groups": {
- FAMILY_CLONES: {
- "functions": clone_functions,
- "blocks": clone_blocks,
- "segments": clone_segments,
- },
+ FAMILY_CLONES: clone_groups_payload,
FAMILY_STRUCTURAL: {
"groups": structural_groups,
},
@@ -2554,6 +2849,7 @@ def build_report_document(
new_function_group_keys: Collection[str] | None = None,
new_block_group_keys: Collection[str] | None = None,
new_segment_group_keys: Collection[str] | None = None,
+ suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None,
metrics: Mapping[str, object] | None = None,
suggestions: Sequence[Suggestion] | None = None,
structural_findings: Sequence[StructuralFindingGroup] | None = None,
@@ -2570,6 +2866,7 @@ def build_report_document(
func_groups=func_groups,
block_groups=block_groups,
segment_groups=segment_groups,
+ suppressed_clone_groups=suppressed_clone_groups,
metrics=metrics,
structural_findings=structural_findings,
)
@@ -2590,6 +2887,7 @@ def build_report_document(
new_function_group_keys=new_function_group_keys,
new_block_group_keys=new_block_group_keys,
new_segment_group_keys=new_segment_group_keys,
+ suppressed_clone_groups=suppressed_clone_groups,
design_thresholds=design_thresholds,
scan_root=scan_root,
)
diff --git a/codeclone/report/markdown.py b/codeclone/report/markdown.py
index 9c5fc0a..6ad5c2e 100644
--- a/codeclone/report/markdown.py
+++ b/codeclone/report/markdown.py
@@ -15,7 +15,7 @@
from .json_contract import build_report_document
if TYPE_CHECKING:
- from ..models import StructuralFindingGroup, Suggestion
+ from ..models import StructuralFindingGroup, Suggestion, SuppressedCloneGroup
from .types import GroupMapLike
MARKDOWN_SCHEMA_VERSION = "1.0"
@@ -43,6 +43,7 @@
("complexity", "Complexity", 3),
("coupling", "Coupling", 3),
("cohesion", "Cohesion", 3),
+ ("coverage-join", "Coverage Join", 3),
("overloaded-modules", "Overloaded Modules", 3),
("dependencies", "Dependencies", 3),
("dead-code-metrics", "Dead Code", 3),
@@ -183,6 +184,59 @@ def _append_findings_section(
lines.append("")
+def _append_suppressed_clone_findings(
+ lines: list[str],
+ *,
+ groups: Sequence[object],
+) -> None:
+ finding_rows = [_as_mapping(group) for group in groups]
+ if not finding_rows:
+ lines.append("_None._")
+ lines.append("")
+ return
+ for group in finding_rows:
+ lines.append("#### Suppressed clone group")
+ lines.append("")
+ _append_kv_bullets(
+ lines,
+ (
+ ("Finding ID", f"`{_text(group.get('id'))}`"),
+ ("Category", group.get("category")),
+ ("Clone Type", group.get("clone_type")),
+ ("Severity", group.get("severity")),
+ ("Scope", _source_scope_text(_as_mapping(group.get("source_scope")))),
+ ("Spread", _spread_text(_as_mapping(group.get("spread")))),
+ ("Occurrences", group.get("count")),
+ ("Suppression Rule", group.get("suppression_rule")),
+ ("Suppression Source", group.get("suppression_source")),
+ (
+ "Matched Patterns",
+ ", ".join(
+ str(item).strip()
+ for item in _as_sequence(group.get("matched_patterns"))
+ if str(item).strip()
+ )
+ or "(none)",
+ ),
+ ),
+ )
+ facts = _as_mapping(group.get("facts"))
+ display_facts = _as_mapping(group.get("display_facts"))
+ if facts or display_facts:
+ _append_facts_block(lines, title="Facts", facts=facts)
+ _append_facts_block(lines, title="Presentation facts", facts=display_facts)
+ lines.append("")
+ items = list(map(_as_mapping, _as_sequence(group.get("items"))))
+ lines.append("- Locations:")
+ visible_items = items[:_MAX_FINDING_LOCATIONS]
+ lines.extend(f" - {_location_text(item)}" for item in visible_items)
+ if len(items) > len(visible_items):
+ lines.append(
+ f" - ... and {len(items) - len(visible_items)} more occurrence(s)"
+ )
+ lines.append("")
+
+
def _append_metric_items(
lines: list[str],
*,
@@ -215,6 +269,7 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str:
findings_summary = _as_mapping(findings.get("summary"))
findings_groups = _as_mapping(findings.get("groups"))
clone_groups = _as_mapping(findings_groups.get("clones"))
+ suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed"))
overview = _as_mapping(derived.get("overview"))
hotlists = _as_mapping(derived.get("hotlists"))
suggestions = _as_sequence(derived.get("suggestions"))
@@ -390,6 +445,17 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str:
*_as_sequence(clone_groups.get("segments")),
],
)
+ if suppressed_clone_groups:
+ lines.append("#### Suppressed Golden Fixture Clone Groups")
+ lines.append("")
+ _append_suppressed_clone_findings(
+ lines,
+ groups=[
+ *_as_sequence(suppressed_clone_groups.get("functions")),
+ *_as_sequence(suppressed_clone_groups.get("blocks")),
+ *_as_sequence(suppressed_clone_groups.get("segments")),
+ ],
+ )
_append_anchor(lines, *_anchor("structural-findings"))
_append_findings_section(
@@ -434,6 +500,30 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str:
("total", "average", "max", "low_cohesion"),
("lcom4", "method_count", "instance_var_count", "risk"),
),
+ (
+ "coverage-join",
+ "Coverage Join",
+ (
+ "status",
+ "source",
+ "units",
+ "measured_units",
+ "overall_permille",
+ "coverage_hotspots",
+ "scope_gap_hotspots",
+ "hotspot_threshold_percent",
+ ),
+ (
+ "coverage_status",
+ "risk",
+ "coverage_permille",
+ "cyclomatic_complexity",
+ "covered_lines",
+ "executable_lines",
+ "coverage_hotspot",
+ "scope_gap_hotspot",
+ ),
+ ),
(
"overloaded-modules",
"Overloaded Modules",
@@ -474,9 +564,13 @@ def render_markdown_report_document(payload: Mapping[str, object]) -> str:
"overloaded_modules" if anchor_id == "overloaded-modules" else anchor_id
)
)
+ if family_key == "coverage-join":
+ family_key = "coverage_join"
family_payload = _as_mapping(metrics_families.get(family_key))
if not family_payload and family_key == "overloaded_modules":
family_payload = _as_mapping(metrics_families.get("god_modules"))
+ if not family_payload and family_key == "coverage_join":
+ continue
family_summary_map = _as_mapping(family_payload.get("summary"))
_append_anchor(lines, anchor_id, title, 3)
_append_kv_bullets(
@@ -542,6 +636,7 @@ def to_markdown_report(
new_function_group_keys: Collection[str] | None = None,
new_block_group_keys: Collection[str] | None = None,
new_segment_group_keys: Collection[str] | None = None,
+ suppressed_clone_groups: Sequence[SuppressedCloneGroup] | None = None,
metrics: Mapping[str, object] | None = None,
suggestions: Collection[Suggestion] | None = None,
structural_findings: Sequence[StructuralFindingGroup] | None = None,
@@ -556,6 +651,7 @@ def to_markdown_report(
new_function_group_keys=new_function_group_keys,
new_block_group_keys=new_block_group_keys,
new_segment_group_keys=new_segment_group_keys,
+ suppressed_clone_groups=suppressed_clone_groups,
metrics=metrics,
suggestions=tuple(suggestions or ()),
structural_findings=tuple(structural_findings or ()),
diff --git a/codeclone/report/overview.py b/codeclone/report/overview.py
index cc0efda..c8c4a3a 100644
--- a/codeclone/report/overview.py
+++ b/codeclone/report/overview.py
@@ -18,6 +18,7 @@
CATEGORY_COHESION,
CATEGORY_COMPLEXITY,
CATEGORY_COUPLING,
+ CATEGORY_COVERAGE,
CATEGORY_DEAD_CODE,
CATEGORY_DEPENDENCY,
CLONE_KIND_BLOCK,
@@ -119,6 +120,7 @@ def _flatten_findings(findings: Mapping[str, object]) -> list[Mapping[str, objec
CATEGORY_COMPLEXITY,
CATEGORY_COUPLING,
CATEGORY_COHESION,
+ CATEGORY_COVERAGE,
CATEGORY_DEPENDENCY,
)
@@ -155,6 +157,7 @@ def _directory_kind_breakdown_key(group: Mapping[str, object]) -> str | None:
CATEGORY_COMPLEXITY,
CATEGORY_COUPLING,
CATEGORY_COHESION,
+ CATEGORY_COVERAGE,
CATEGORY_DEPENDENCY,
}:
return category
@@ -504,6 +507,19 @@ def serialize_finding_group_card(group: Mapping[str, object]) -> dict[str, objec
_as_int(group.get("count")),
)
summary = f"{cycle_length} modules participate in this cycle"
+ elif category == CATEGORY_COVERAGE:
+ kind = str(group.get("kind", "")).strip()
+ coverage_status = str(facts.get("coverage_status", "")).strip()
+ threshold = _as_int(facts.get("hotspot_threshold_percent"))
+ if kind == "coverage_scope_gap" or coverage_status == "missing_from_report":
+ title = "Include risky function in coverage input"
+ summary = "coverage.xml did not include this function's file"
+ else:
+ title = "Increase coverage for risky function"
+ summary = (
+ f"coverage={_as_int(facts.get('coverage_permille')) / 10.0:.1f}%, "
+ f"threshold={threshold}%"
+ )
return {
"title": title,
diff --git a/codeclone/report/sarif.py b/codeclone/report/sarif.py
index e316553..ec2177d 100644
--- a/codeclone/report/sarif.py
+++ b/codeclone/report/sarif.py
@@ -23,6 +23,7 @@
CATEGORY_COHESION,
CATEGORY_COMPLEXITY,
CATEGORY_COUPLING,
+ CATEGORY_COVERAGE,
CATEGORY_DEPENDENCY,
CLONE_KIND_BLOCK,
CLONE_KIND_FUNCTION,
@@ -33,6 +34,8 @@
FAMILY_STRUCTURAL,
FINDING_KIND_CLASS_HOTSPOT,
FINDING_KIND_CLONE_GROUP,
+ FINDING_KIND_COVERAGE_HOTSPOT,
+ FINDING_KIND_COVERAGE_SCOPE_GAP,
FINDING_KIND_CYCLE,
FINDING_KIND_FUNCTION_HOTSPOT,
FINDING_KIND_UNUSED_SYMBOL,
@@ -346,6 +349,28 @@ def _design_rule_spec(category: str, kind: str) -> _RuleSpec:
kind or FINDING_KIND_CLASS_HOTSPOT,
CONFIDENCE_HIGH,
)
+ if category == CATEGORY_COVERAGE:
+ if kind == FINDING_KIND_COVERAGE_SCOPE_GAP:
+ return _RuleSpec(
+ "CDESIGN006",
+ "Coverage scope gap",
+ "A medium/high-risk function is outside the supplied joined "
+ "coverage scope.",
+ SEVERITY_WARNING,
+ FAMILY_DESIGN,
+ kind,
+ CONFIDENCE_HIGH,
+ )
+ return _RuleSpec(
+ "CDESIGN005",
+ "Coverage hotspot",
+ "A medium/high-risk function falls below the configured joined "
+ "coverage threshold.",
+ SEVERITY_WARNING,
+ FAMILY_DESIGN,
+ kind or FINDING_KIND_COVERAGE_HOTSPOT,
+ CONFIDENCE_HIGH,
+ )
return _RuleSpec(
"CDESIGN004",
"Dependency cycle",
@@ -452,6 +477,13 @@ def _design_result_message(
fact_key, label, metric_label = spec
value = _as_int(facts.get(fact_key))
return f"{label} ({metric_label}={value}): {qualname}."
+ if category == CATEGORY_COVERAGE:
+ coverage_status = _text(facts.get("coverage_status"))
+ threshold = _as_int(facts.get("hotspot_threshold_percent"))
+ if coverage_status == "missing_from_report":
+ return f"Coverage scope gap (not in coverage.xml): {qualname}."
+ coverage_pct = _as_int(facts.get("coverage_permille")) / 10.0
+ return f"Coverage hotspot ({coverage_pct:.1f}% < {threshold}%): {qualname}."
modules = [_text(item.get("module")) for item in items if _text(item.get("module"))]
return f"Dependency cycle ({len(modules)} modules): {' -> '.join(modules)}."
@@ -658,6 +690,11 @@ def _design_result_properties(
"cyclomatic_complexity",
"nesting_depth",
"cycle_length",
+ "coverage_permille",
+ "covered_lines",
+ "executable_lines",
+ "hotspot_threshold_percent",
+ "coverage_status",
):
if key in facts:
props[key] = facts[key]
diff --git a/codeclone/report/serialize.py b/codeclone/report/serialize.py
index 3cd4a9e..80c37d4 100644
--- a/codeclone/report/serialize.py
+++ b/codeclone/report/serialize.py
@@ -160,6 +160,54 @@ def _append_clone_section(
lines.pop()
+def _append_suppressed_clone_section(
+ lines: list[str],
+ *,
+ title: str,
+ groups: Sequence[object],
+ metric_name: str,
+) -> None:
+ section_groups = [_as_mapping(group) for group in groups]
+ lines.append(f"{title} (groups={len(section_groups)})")
+ if not section_groups:
+ lines.append("(none)")
+ return
+ for idx, group in enumerate(section_groups, start=1):
+ lines.append(f"=== Suppressed clone group #{idx} ===")
+ lines.append(
+ "id="
+ f"{format_meta_text_value(group.get('id'))} "
+ f"clone_type={format_meta_text_value(group.get('clone_type'))} "
+ f"severity={format_meta_text_value(group.get('severity'))} "
+ f"count={format_meta_text_value(group.get('count'))} "
+ f"spread={_spread_text(_as_mapping(group.get('spread')))} "
+ f"scope={_scope_text(_as_mapping(group.get('source_scope')))} "
+ "suppressed_by="
+ f"{format_meta_text_value(group.get('suppression_rule'))}"
+ "@"
+ f"{format_meta_text_value(group.get('suppression_source'))} "
+ "matched_patterns="
+ f"{format_meta_text_value(group.get('matched_patterns'))}"
+ )
+ facts = _as_mapping(group.get("facts"))
+ if facts:
+ lines.append(
+ "facts: "
+ + _format_key_values(
+ facts,
+ tuple(sorted(str(key) for key in facts)),
+ skip_empty=True,
+ )
+ )
+ lines.extend(
+ _location_line(item, metric_name=metric_name)
+ for item in map(_as_mapping, _as_sequence(group.get("items")))
+ )
+ lines.append("")
+ if lines[-1] == "":
+ lines.pop()
+
+
def _append_structural_findings(lines: list[str], groups: Sequence[object]) -> None:
structural_groups = [_as_mapping(group) for group in groups]
lines.append(f"STRUCTURAL FINDINGS (groups={len(structural_groups)})")
@@ -479,7 +527,14 @@ def render_text_report_document(payload: Mapping[str, object]) -> str:
digest = _as_mapping(integrity.get("digest"))
findings_groups = _as_mapping(findings.get("groups"))
clone_groups = _as_mapping(findings_groups.get("clones"))
+ suppressed_clone_groups = _as_mapping(clone_groups.get("suppressed"))
runtime_meta = _as_mapping(meta_payload.get("runtime"))
+ clone_summary_keys: list[str] = ["functions", "blocks", "segments", "new", "known"]
+ if "suppressed" in findings_clones:
+ clone_summary_keys.append("suppressed")
+ suppressed_summary_keys: list[str] = ["dead_code"]
+ if "clones" in findings_suppressed:
+ suppressed_summary_keys.append("clones")
lines = [
"REPORT METADATA",
@@ -581,12 +636,12 @@ def render_text_report_document(payload: Mapping[str, object]) -> str:
"Clones: "
+ _format_key_values(
findings_clones,
- ("functions", "blocks", "segments", "new", "known"),
+ tuple(clone_summary_keys),
),
"Suppressed: "
+ _format_key_values(
findings_suppressed,
- ("dead_code",),
+ tuple(suppressed_summary_keys),
),
"",
"METRICS SUMMARY",
@@ -596,18 +651,32 @@ def render_text_report_document(payload: Mapping[str, object]) -> str:
"complexity",
"coupling",
"cohesion",
+ "coverage_join",
"overloaded_modules",
"dependencies",
"dead_code",
"health",
):
family_summary = _as_mapping(metrics_summary.get(family_name))
+ if family_name == "coverage_join" and not family_summary:
+ continue
keys: Sequence[str]
match family_name:
case "complexity" | "coupling":
keys = ("total", "average", "max", "high_risk")
case "cohesion":
keys = ("total", "average", "max", "low_cohesion")
+ case "coverage_join":
+ keys = (
+ "status",
+ "source",
+ "units",
+ "measured_units",
+ "overall_permille",
+ "coverage_hotspots",
+ "scope_gap_hotspots",
+ "hotspot_threshold_percent",
+ )
case "dependencies":
keys = ("modules", "edges", "cycles", "max_depth")
case "overloaded_modules":
@@ -624,6 +693,36 @@ def render_text_report_document(payload: Mapping[str, object]) -> str:
keys = ("score", "grade")
lines.append(f"{family_name}: {_format_key_values(family_summary, keys)}")
+ coverage_join_family = _as_mapping(metrics_families.get("coverage_join"))
+ coverage_join_items = _as_sequence(coverage_join_family.get("items"))
+ if coverage_join_family:
+ lines.extend(
+ [
+ "",
+ "COVERAGE JOIN (top 10)",
+ ]
+ )
+ if not coverage_join_items:
+ lines.append("(none)")
+ else:
+ lines.extend(
+ "- "
+ + _format_key_values(
+ item,
+ (
+ "relative_path",
+ "qualname",
+ "coverage_status",
+ "risk",
+ "coverage_permille",
+ "cyclomatic_complexity",
+ "coverage_hotspot",
+ "scope_gap_hotspot",
+ ),
+ )
+ for item in map(_as_mapping, coverage_join_items[:10])
+ )
+
overloaded_modules_family = _as_mapping(metrics_families.get("overloaded_modules"))
if not overloaded_modules_family:
overloaded_modules_family = _as_mapping(metrics_families.get("god_modules"))
@@ -710,6 +809,28 @@ def render_text_report_document(payload: Mapping[str, object]) -> str:
novelty="known",
metric_name="size",
)
+ if suppressed_clone_groups:
+ lines.append("")
+ _append_suppressed_clone_section(
+ lines,
+ title="SUPPRESSED FUNCTION CLONES",
+ groups=_as_sequence(suppressed_clone_groups.get("functions")),
+ metric_name="loc",
+ )
+ lines.append("")
+ _append_suppressed_clone_section(
+ lines,
+ title="SUPPRESSED BLOCK CLONES",
+ groups=_as_sequence(suppressed_clone_groups.get("blocks")),
+ metric_name="size",
+ )
+ lines.append("")
+ _append_suppressed_clone_section(
+ lines,
+ title="SUPPRESSED SEGMENT CLONES",
+ groups=_as_sequence(suppressed_clone_groups.get("segments")),
+ metric_name="size",
+ )
lines.append("")
_append_structural_findings(
lines,
diff --git a/codeclone/ui_messages.py b/codeclone/ui_messages.py
index 63d013a..d811b31 100644
--- a/codeclone/ui_messages.py
+++ b/codeclone/ui_messages.py
@@ -104,6 +104,10 @@
"Collect public API surface facts for baseline-aware compatibility review.\n"
"Disabled by default."
)
+HELP_COVERAGE = (
+ "Join external Cobertura XML line coverage to function spans.\n"
+ "Pass a `coverage xml` report path."
+)
HELP_FAIL_ON_TYPING_REGRESSION = (
"Exit with code 3 if typing adoption coverage regresses relative to the\n"
"metrics baseline."
@@ -116,6 +120,10 @@
"Exit with code 3 if public API removals or signature breaks are detected\n"
"relative to the metrics baseline."
)
+HELP_FAIL_ON_UNTESTED_HOTSPOTS = (
+ "Exit with code 3 if medium/high-risk functions measured by Coverage Join\n"
+ "fall below the joined coverage threshold.\nRequires --coverage."
+)
HELP_MIN_TYPING_COVERAGE = (
"Exit with code 3 if parameter typing coverage falls below the threshold.\n"
"Threshold is a whole percent from 0 to 100."
@@ -124,6 +132,10 @@
"Exit with code 3 if public docstring coverage falls below the threshold.\n"
"Threshold is a whole percent from 0 to 100."
)
+HELP_COVERAGE_MIN = (
+ "Coverage threshold for untested hotspot detection.\n"
+ "Threshold is a whole percent from 0 to 100.\nDefault: 50."
+)
HELP_CI = (
"Enable CI preset.\n"
"Equivalent to: --fail-on-new --no-color --quiet.\n"
@@ -236,6 +248,7 @@
WARN_HTML_REPORT_OPEN_FAILED = (
"[warning]Failed to open HTML report in browser: {path} ({error}).[/warning]"
)
+WARN_COVERAGE_JOIN_IGNORED = "[warning]Coverage join ignored: {error}[/warning]"
ERR_INVALID_OUTPUT_EXT = (
"[error]Invalid {label} output extension: {path} "
@@ -351,6 +364,10 @@ def fmt_html_report_open_failed(*, path: Path, error: object) -> str:
return WARN_HTML_REPORT_OPEN_FAILED.format(path=path, error=error)
+def fmt_coverage_join_ignored(error: object) -> str:
+ return WARN_COVERAGE_JOIN_IGNORED.format(error=error)
+
+
def fmt_unreadable_source_in_gating(*, count: int) -> str:
return ERR_UNREADABLE_SOURCE_IN_GATING.format(count=count)
@@ -405,15 +422,19 @@ def fmt_summary_compact_clones(
block: int,
segment: int,
suppressed: int,
+ fixture_excluded: int,
new: int,
) -> str:
- return SUMMARY_COMPACT_CLONES.format(
- function=function,
- block=block,
- segment=segment,
- suppressed=suppressed,
- new=new,
- )
+ parts = [
+ f"Clones func={function}",
+ f"block={block}",
+ f"seg={segment}",
+ f"suppressed={suppressed}",
+ ]
+ if fixture_excluded > 0:
+ parts.append(f"fixtures={fixture_excluded}")
+ parts.append(f"new={new}")
+ return " ".join(parts)
def fmt_summary_compact_metrics(
@@ -445,6 +466,63 @@ def fmt_summary_compact_metrics(
)
+def fmt_summary_compact_adoption(
+ *,
+ param_permille: int,
+ return_permille: int,
+ docstring_permille: int,
+ any_annotation_count: int,
+) -> str:
+ return (
+ "Adoption"
+ f" params={_format_permille_pct(param_permille)}"
+ f" returns={_format_permille_pct(return_permille)}"
+ f" docstrings={_format_permille_pct(docstring_permille)}"
+ f" any={any_annotation_count}"
+ )
+
+
+def fmt_summary_compact_api_surface(
+ *,
+ public_symbols: int,
+ modules: int,
+ added: int,
+ breaking: int,
+) -> str:
+ return (
+ "Public API"
+ f" symbols={public_symbols}"
+ f" modules={modules}"
+ f" breaking={breaking}"
+ f" added={added}"
+ )
+
+
+def fmt_summary_compact_coverage_join(
+ *,
+ status: str,
+ overall_permille: int,
+ coverage_hotspots: int,
+ scope_gap_hotspots: int,
+ threshold_percent: int,
+ source_label: str,
+) -> str:
+ parts = [f"Coverage status={status or 'unknown'}"]
+ if status == "ok":
+ parts.extend(
+ [
+ f"overall={_format_permille_pct(overall_permille)}",
+ f"coverage_hotspots={coverage_hotspots}",
+ f"threshold={threshold_percent}",
+ ]
+ )
+ if scope_gap_hotspots > 0:
+ parts.append(f"scope_gaps={scope_gap_hotspots}")
+ if source_label:
+ parts.append(f"source={source_label}")
+ return " ".join(parts)
+
+
_HEALTH_GRADE_STYLE: dict[str, str] = {
HEALTH_GRADE_A: "bold green",
HEALTH_GRADE_B: "green",
@@ -505,7 +583,13 @@ def fmt_summary_parsed(
def fmt_summary_clones(
- *, func: int, block: int, segment: int, suppressed: int, new: int
+ *,
+ func: int,
+ block: int,
+ segment: int,
+ suppressed: int,
+ fixture_excluded: int,
+ new: int,
) -> str:
clone_parts = [
f"{_v(func, 'bold yellow')} func",
@@ -516,8 +600,10 @@ def fmt_summary_clones(
main = " \u00b7 ".join(clone_parts)
quals = [
f"{_v(suppressed, 'yellow')} suppressed",
- f"{_v(new, 'bold red')} new",
]
+ if fixture_excluded > 0:
+ quals.append(f"{_v(fixture_excluded, 'yellow')} fixtures")
+ quals.append(f"{_v(new, 'bold red')} new")
return f" {'Clones':<{_L}}{main} ({', '.join(quals)})"
@@ -610,6 +696,31 @@ def fmt_metrics_api_surface(
return f" {'Public API':<{_L}}{' · '.join(parts)}"
+def fmt_metrics_coverage_join(
+ *,
+ status: str,
+ overall_permille: int,
+ coverage_hotspots: int,
+ scope_gap_hotspots: int,
+ threshold_percent: int,
+ source_label: str,
+) -> str:
+ if status != "ok":
+ parts = ["join unavailable"]
+ if source_label:
+ parts.append(source_label)
+ return f" {'Coverage':<{_L}}[yellow]{' · '.join(parts)}[/yellow]"
+ parts = [
+ f"{_format_permille_pct(overall_permille)} overall",
+ f"{_v(coverage_hotspots, 'bold red')} hotspots < {threshold_percent}%",
+ ]
+ if scope_gap_hotspots > 0:
+ parts.append(f"{_v(scope_gap_hotspots, 'bold yellow')} scope gaps")
+ if source_label:
+ parts.append(source_label)
+ return f" {'Coverage':<{_L}}{' · '.join(parts)}"
+
+
def fmt_metrics_overloaded_modules(
*,
candidates: int,
diff --git a/docs/README.md b/docs/README.md
index a5df1f1..843577b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -37,9 +37,9 @@ repository build:
- [Exit codes and failure policy](book/03-contracts-exit-codes.md)
- [Config and defaults](book/04-config-and-defaults.md)
- [Core pipeline and invariants](book/05-core-pipeline.md)
-- [Baseline contract (schema v2.0)](book/06-baseline.md)
-- [Cache contract (schema v2.3)](book/07-cache.md)
-- [Report contract (schema v2.5)](book/08-report.md)
+- [Baseline contract (schema v2.1)](book/06-baseline.md)
+- [Cache contract (schema v2.4)](book/07-cache.md)
+- [Report contract (schema v2.8)](book/08-report.md)
## Interfaces
diff --git a/docs/architecture.md b/docs/architecture.md
index 5c96b06..43fab28 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -144,7 +144,7 @@ gating decisions.
Detected findings can be rendered as:
- interactive HTML (`--html`),
-- canonical JSON (`--json`, schema `2.5`),
+- canonical JSON (`--json`, schema `2.8`),
- deterministic text projection (`--text`),
- deterministic Markdown projection (`--md`),
- deterministic SARIF projection (`--sarif`).
diff --git a/docs/book/04-config-and-defaults.md b/docs/book/04-config-and-defaults.md
index a83593c..934d7f5 100644
--- a/docs/book/04-config-and-defaults.md
+++ b/docs/book/04-config-and-defaults.md
@@ -32,6 +32,7 @@ Key defaults:
- `--baseline=codeclone.baseline.json`
- `--max-baseline-size-mb=5`
- `--max-cache-size-mb=50`
+- `--coverage-min=50`
- default cache path (when no cache flag is given): `
/.cache/codeclone/cache.json`
- `--metrics-baseline=codeclone.baseline.json` (same default path as `--baseline`)
- bare reporting flags use default report paths:
@@ -59,12 +60,109 @@ skip_metrics = true
quiet = true
```
+Supported `[tool.codeclone]` keys in the current line:
+
+Analysis:
+
+| Key | Type | Default | Meaning |
+|------------------------|---------------|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|
+| `min_loc` | `int` | `10` | Minimum function LOC for clone admission |
+| `min_stmt` | `int` | `6` | Minimum function statement count for clone admission |
+| `block_min_loc` | `int` | `20` | Minimum function LOC for block-window analysis |
+| `block_min_stmt` | `int` | `8` | Minimum function statements for block-window analysis |
+| `segment_min_loc` | `int` | `20` | Minimum function LOC for segment analysis |
+| `segment_min_stmt` | `int` | `10` | Minimum function statements for segment analysis |
+| `processes` | `int` | `4` | Worker process count |
+| `cache_path` | `str \| null` | `/.cache/codeclone/cache.json` | Cache file path |
+| `max_cache_size_mb` | `int` | `50` | Maximum accepted cache size before fail-open ignore |
+| `skip_metrics` | `bool` | `false*` | Skip full metrics mode when allowed |
+| `skip_dead_code` | `bool` | `false` | Skip dead-code analysis |
+| `skip_dependencies` | `bool` | `false` | Skip dependency analysis |
+| `golden_fixture_paths` | `list[str]` | `[]` | Exclude clone groups fully contained in matching golden test fixtures from health/gates/active findings; keep them as suppressed report facts |
+
+Baseline and CI:
+
+| Key | Type | Default | Meaning |
+|---------------------------|--------|---------------------------|-------------------------------------------|
+| `baseline` | `str` | `codeclone.baseline.json` | Clone baseline path |
+| `max_baseline_size_mb` | `int` | `5` | Maximum accepted baseline size |
+| `update_baseline` | `bool` | `false` | Rewrite unified baseline from current run |
+| `metrics_baseline` | `str` | `codeclone.baseline.json` | Dedicated metrics-baseline path override |
+| `update_metrics_baseline` | `bool` | `false` | Rewrite metrics baseline from current run |
+| `ci` | `bool` | `false` | Enable CI preset behavior |
+
+Quality gates and metric collection:
+
+| Key | Type | Default | Meaning |
+|--------------------------------|---------------|---------|-------------------------------------------------------------------------------------|
+| `fail_on_new` | `bool` | `false` | Fail when new clone groups appear |
+| `fail_threshold` | `int` | `-1` | Fail when clone count exceeds threshold |
+| `fail_complexity` | `int` | `-1` | Fail when max cyclomatic complexity exceeds threshold |
+| `fail_coupling` | `int` | `-1` | Fail when max CBO exceeds threshold |
+| `fail_cohesion` | `int` | `-1` | Fail when max LCOM4 exceeds threshold |
+| `fail_cycles` | `bool` | `false` | Fail when dependency cycles are present |
+| `fail_dead_code` | `bool` | `false` | Fail when high-confidence dead code is present |
+| `fail_health` | `int` | `-1` | Fail when health score drops below threshold |
+| `fail_on_new_metrics` | `bool` | `false` | Fail on new metric hotspots vs trusted metrics baseline |
+| `typing_coverage` | `bool` | `true` | Collect typing adoption facts |
+| `docstring_coverage` | `bool` | `true` | Collect public docstring adoption facts |
+| `api_surface` | `bool` | `false` | Collect public API inventory/diff facts |
+| `coverage_xml` | `str \| null` | `null` | Join external Cobertura XML to current-run function spans |
+| `coverage_min` | `int` | `50` | Coverage threshold for joined measured coverage hotspots |
+| `min_typing_coverage` | `int` | `-1` | Minimum allowed typing coverage percent |
+| `min_docstring_coverage` | `int` | `-1` | Minimum allowed docstring coverage percent |
+| `fail_on_typing_regression` | `bool` | `false` | Fail on typing coverage regression vs metrics baseline |
+| `fail_on_docstring_regression` | `bool` | `false` | Fail on docstring coverage regression vs metrics baseline |
+| `fail_on_api_break` | `bool` | `false` | Fail on public API breaking changes vs metrics baseline |
+| `fail_on_untested_hotspots` | `bool` | `false` | Fail when medium/high-risk functions measured by Coverage Join fall below threshold |
+
+Report outputs and local UX:
+
+| Key | Type | Default | Meaning |
+|---------------|---------------|---------|--------------------------------|
+| `html_out` | `str \| null` | `null` | HTML report output path |
+| `json_out` | `str \| null` | `null` | JSON report output path |
+| `md_out` | `str \| null` | `null` | Markdown report output path |
+| `sarif_out` | `str \| null` | `null` | SARIF report output path |
+| `text_out` | `str \| null` | `null` | Plain-text report output path |
+| `no_progress` | `bool` | `false` | Disable progress UI |
+| `no_color` | `bool` | `false` | Disable colored CLI output |
+| `quiet` | `bool` | `false` | Use compact CLI output |
+| `verbose` | `bool` | `false` | Enable more verbose CLI output |
+| `debug` | `bool` | `false` | Enable debug diagnostics |
+
+This is the exact accepted key set from `codeclone/_cli_config.py`; unknown
+keys are contract errors.
+
+Notes:
+
+- `skip_metrics=false*`: parser default is `false`, but runtime may auto-enable
+ it when no metrics work is requested and no metrics baseline exists.
+- Report output keys default to `null`; bare CLI flags still write to the
+ deterministic `.cache/codeclone/report.*` paths listed above.
+
CLI always has precedence when option is explicitly provided, including boolean
overrides via `--foo/--no-foo` (e.g. `--no-skip-metrics`).
Path values loaded from `pyproject.toml` are normalized relative to resolved
scan root when provided as relative paths.
+`golden_fixture_paths` is different:
+
+- entries are repo-relative glob patterns, not filesystem paths
+- they are not normalized to absolute paths
+- they must target `tests/` or `tests/fixtures/`
+- a clone group is excluded only when every occurrence matches the configured
+ golden-fixture scope
+
+Current-run coverage join config:
+
+- `coverage_xml` may be set in `pyproject.toml`; relative paths resolve from
+ the scan root like other configured paths.
+- `coverage_min` and `fail_on_untested_hotspots` follow the same precedence
+ rules as CLI flags.
+- Coverage join remains current-run only and does not persist to baseline.
+
Metrics baseline path selection contract:
- If `--metrics-baseline` is explicitly set, that path is used.
diff --git a/docs/book/05-core-pipeline.md b/docs/book/05-core-pipeline.md
index 1640dd1..57c3a3c 100644
--- a/docs/book/05-core-pipeline.md
+++ b/docs/book/05-core-pipeline.md
@@ -30,6 +30,8 @@ Stages:
5. Report-layer post-processing:
- merge block windows to maximal regions
- merge/suppress segment report groups
+ - optionally split out clone groups fully contained in configured
+ `golden_fixture_paths`
6. Structural report findings:
- duplicated branch families from per-function AST structure facts
- clone cohort drift families built from existing function groups (no rescan)
@@ -42,20 +44,28 @@ Stages:
- seven dimension scores: clones, complexity, coupling, cohesion,
dead code, dependencies, coverage
- weighted blend → composite score (0–100) and grade (A–F)
-9. Design finding extraction:
- - threshold-aware findings for complexity, coupling, cohesion
- - thresholds recorded in `meta.analysis_thresholds.design_findings`
-10. Suggestion generation:
+9. Suggestion generation:
- advisory cards from clone groups, structural findings, metric violations
- deterministic priority sort, never gates CI
-11. Derived overview and hotlists:
+10. Current-run coverage join (optional):
+ - when `--coverage` is present, join external Cobertura XML to discovered
+ function spans
+ - invalid XML becomes `coverage_join.status="invalid"` for that run rather
+ than mutating baseline state
+11. Design finding extraction:
+ - threshold-aware findings for complexity, coupling, cohesion
+ - coverage `coverage_hotspot` / `coverage_scope_gap` findings from valid
+ coverage-join rows only
+ - thresholds recorded in `meta.analysis_thresholds.design_findings`
+12. Derived overview and hotlists:
- overview families, top risks, source breakdown, health snapshot
- directory hotspots by category (`derived.overview.directory_hotspots`)
- hotlists: most actionable, highest spread, production/test-fixture hotspots
-12. Gate evaluation:
+13. Gate evaluation:
- clone-baseline diff (NEW vs KNOWN)
- metric threshold gates (`--fail-complexity`, `--fail-coupling`, etc.)
- metric regression gates (`--fail-on-new-metrics`)
+ - coverage hotspot gate (`--fail-on-untested-hotspots`)
- gate reasons emitted in deterministic order
Refs:
@@ -65,6 +75,7 @@ Refs:
- `codeclone/report/blocks.py:prepare_block_report_groups`
- `codeclone/report/segments.py:prepare_segment_report_groups`
- `codeclone/metrics/health.py:compute_health`
+- `codeclone/metrics/coverage_join.py:build_coverage_join`
- `codeclone/report/json_contract.py:_build_design_groups`
- `codeclone/report/suggestions.py:generate_suggestions`
- `codeclone/report/overview.py:build_directory_hotspots`
@@ -76,6 +87,12 @@ Refs:
- Report-layer transformations do not change function/block grouping keys used for baseline diff.
- Segment groups are report-only and do not participate in baseline diff/gating.
- Structural findings are report-only and do not participate in baseline diff/gating.
+- `golden_fixture_paths` is a project-level clone exclusion policy, not a
+ fingerprint/baseline rule:
+ - it applies only to clone groups fully contained in matching
+ `tests/` / `tests/fixtures/` paths
+ - excluded groups do not affect health, clone gates, or suggestions
+ - excluded groups remain observable as suppressed canonical report facts
- Dead-code liveness references from test paths are excluded at extraction/cache-load boundaries for both
local-name references and canonical qualname references.
diff --git a/docs/book/06-baseline.md b/docs/book/06-baseline.md
index 79d37e5..b21c51e 100644
--- a/docs/book/06-baseline.md
+++ b/docs/book/06-baseline.md
@@ -61,6 +61,8 @@ Embedded metrics contract:
- The default runtime flow is unified: clone baseline and metrics baseline
usually share the same `codeclone.baseline.json` file unless the metrics path
is explicitly overridden.
+- In unified rewrite mode, disabled optional metric surfaces are omitted from
+ the rewritten embedded payload instead of being preserved as stale baggage.
Integrity payload includes only:
diff --git a/docs/book/07-cache.md b/docs/book/07-cache.md
index 1e2fe51..f61b815 100644
--- a/docs/book/07-cache.md
+++ b/docs/book/07-cache.md
@@ -2,7 +2,7 @@
## Purpose
-Define cache schema v2.3, integrity verification, and fail-open behavior.
+Define cache schema v2.4, integrity verification, and fail-open behavior.
## Public surface
@@ -13,7 +13,7 @@ Define cache schema v2.3, integrity verification, and fail-open behavior.
## Data model
-On-disk schema (`v == "2.3"`):
+On-disk schema (`v == "2.4"`):
- Top-level: `v`, `payload`, `sig`
- `payload` keys: `py`, `fp`, `ap`, `files`, optional `sr`
@@ -73,6 +73,8 @@ Refs:
- Cache save writes canonical JSON and atomically replaces target file.
- Empty sections (`u`, `b`, `s`) are omitted from written wire entries.
- `rn`/`rq` are serialized as sorted unique arrays and omitted when empty.
+- Cached public-API symbol payloads preserve declared parameter order; cache
+ canonicalization must not reorder callable signatures.
- `ss` is written when source stats are available and is required for full cache-hit
accounting in discovery stage.
- Legacy secret file `.cache_secret` is never used for trust; warning only.
diff --git a/docs/book/08-report.md b/docs/book/08-report.md
index 60eb5b1..2dbc6a0 100644
--- a/docs/book/08-report.md
+++ b/docs/book/08-report.md
@@ -2,7 +2,7 @@
## Purpose
-Define report contracts in `2.0.0b5`: canonical JSON (`report_schema_version=2.5`)
+Define report contracts in `2.0.0b5`: canonical JSON (`report_schema_version=2.8`)
plus deterministic TXT/Markdown/SARIF projections.
## Public surface
@@ -16,7 +16,7 @@ plus deterministic TXT/Markdown/SARIF projections.
## Data model
-JSON report top-level (v2.5):
+JSON report top-level (v2.8):
- `report_schema_version`
- `meta`
@@ -45,9 +45,19 @@ Canonical report-only metrics additions:
- `metrics.families.api_surface` records the current public symbol inventory
and compact baseline diff facts (`added`, `breaking`) when
`--api-surface` is enabled
-- the family is canonical report truth, but it does **not** participate in
- findings totals, health, gates, baseline NEW/KNOWN semantics, or SARIF in
- `b4`
+- `metrics.families.coverage_join` records an optional current-run join between
+ external Cobertura line coverage and CodeClone function spans. Its summary
+ carries `status`, `source`, unit/line counts, `overall_permille`,
+ `missing_from_report_units`, `coverage_hotspots`, `scope_gap_hotspots`,
+ `hotspot_threshold_percent`, and optional `invalid_reason`; the same compact
+ summary is mirrored in `metrics.summary.coverage_join`; its items carry
+ per-function joined coverage facts, including `coverage_status`,
+ `coverage_hotspot`, and `scope_gap_hotspot`.
+- coverage join facts are canonical report truth for that run, but they are
+ **not** baseline truth and do not update `codeclone.baseline.json`
+- adoption/API/coverage-join metrics do **not** participate in clone baseline
+ NEW/KNOWN semantics; coverage join also does not participate in health scoring
+ and gates only when explicitly requested
- `Overloaded Modules` is a report-only experimental layer rather than a second
complexity metric:
- complexity reports local control-flow hotspots in functions and methods
@@ -60,6 +70,12 @@ Coverage/API role split:
- `coverage_adoption` is a canonical metrics family, not a style linter. It
reports observable adoption facts only.
+- `coverage_join` is a canonical current-run signal over an external Cobertura
+ XML file. It reports joined line facts and may materialize
+ `design` findings with `category="coverage"` and kinds
+ `coverage_hotspot` (measured below threshold) or `coverage_scope_gap`
+ (outside the supplied coverage scope); it does not infer branch coverage or
+ execute tests.
- `api_surface` is a canonical metrics/gating family, not a second finding
engine. It reports public API inventory plus baseline-diff facts when the
run opted into API collection.
@@ -89,10 +105,14 @@ Derived projection layer:
Finding families:
- `findings.groups.clones.{functions,blocks,segments}`
+- optional `findings.groups.clones.suppressed.{functions,blocks,segments}` for
+ clone groups excluded by project policy such as `golden_fixture_paths`
- `findings.groups.structural.groups`
- `findings.groups.dead_code.groups`
- `findings.groups.design.groups`
- `findings.summary.suppressed.dead_code` (suppressed counter, non-active findings)
+- optional `findings.summary.suppressed.clones` plus clone-summary suppressed
+ counters when clone groups were excluded from active findings
Important role split:
@@ -132,6 +152,10 @@ Per-group common axes (family-specific fields may extend):
- Design findings are built once in the canonical report using the effective
threshold policy recorded in `meta.analysis_thresholds.design_findings`; MCP
and HTML must not re-synthesize them post-hoc from raw metric rows.
+- Coverage design findings are built from canonical `coverage_join` rows only
+ when a valid join is present. Invalid coverage input is represented as
+ `metrics.families.coverage_join.summary.status="invalid"` with no hotspot
+ item rows.
- HTML overview cards are materialized from canonical findings plus
`derived.overview` + `derived.hotlists`; pre-expanded overview card payloads are
not part of the report contract.
@@ -160,6 +184,9 @@ Per-group common axes (family-specific fields may extend):
- Dead-code suppressed candidates are carried only under metrics
(`metrics.families.dead_code.suppressed_items`) and never promoted to
active `findings.groups.dead_code`.
+- Clone groups excluded by `golden_fixture_paths` are carried only under
+ `findings.groups.clones.suppressed.*`; they do not contribute to active
+ findings totals, health scoring, clone gating, or suggestion generation.
- A lower score after upgrade may reflect a broader health model, not only
worse code. Report renderers may surface the score, but health-model
expansion is documented separately in [15-health-score.md](15-health-score.md)
diff --git a/docs/book/09-cli.md b/docs/book/09-cli.md
index fb0f467..e79df83 100644
--- a/docs/book/09-cli.md
+++ b/docs/book/09-cli.md
@@ -25,6 +25,7 @@ Summary metrics:
- files found/analyzed/cache hits/skipped
- structural counters: analyzed lines/functions/methods/classes
- function/block/segment groups
+- excluded golden-fixture clone groups (when configured)
- suppressed segment groups
- dead-code active/suppressed status in metrics line
- adoption coverage in the normal `Metrics` block:
@@ -32,6 +33,9 @@ Summary metrics:
- public API surface in the normal `Metrics` block when `api_surface` was
collected: symbol/module counts plus added/breaking deltas when a trusted
metrics baseline is available
+- coverage join in the normal `Metrics` block when `--coverage FILE` was
+ provided: joined Cobertura overall line coverage, untested hotspot count, and
+ threshold/source context
- new vs baseline
Metrics-related CLI gates:
@@ -45,10 +49,16 @@ Metrics-related CLI gates:
`--fail-on-typing-regression`,
`--fail-on-docstring-regression`,
`--fail-on-api-break`
+- external coverage join gate:
+ `--coverage FILE`, `--coverage-min PERCENT`,
+ `--fail-on-untested-hotspots`
- update mode:
`--update-metrics-baseline`
- opt-in metrics family:
`--api-surface`
+- In unified baseline mode, `--update-baseline` rewrites embedded metric
+ surfaces from the current enabled config; disabled optional surfaces are
+ dropped.
Refs:
@@ -82,8 +92,12 @@ Refs:
- The normal rich `Metrics` block includes:
- `Adoption` when adoption coverage facts were computed
- `Public API` when `api_surface` facts were computed
+ - `Coverage` when Cobertura coverage was joined with `--coverage`
- Quiet compact metrics output stays on the existing fixed one-line summary and
- does not expand adoption/API detail.
+ does not expand adoption/API/coverage-join detail.
+- When `golden_fixture_paths` excludes clone groups from active review, CLI
+ keeps that count inside the `Clones` summary line (`fixtures=N`) instead of
+ adding a separate summary row.
- Typing/docstring adoption metrics are computed by default in full mode.
- `--api-surface` is opt-in in normal runs, but runtime auto-enables it when
`--fail-on-api-break` or `--update-metrics-baseline` needs a public API
@@ -92,6 +106,17 @@ Refs:
metrics baseline that already contains adoption coverage data.
- `--fail-on-api-break` requires a metrics baseline that already contains
`api_surface` data.
+- `--coverage` is a current-run external Cobertura input. It does not update or
+ compare against `codeclone.baseline.json`.
+- Invalid Cobertura XML is warning-only in normal runs: CLI prints
+ `Coverage join ignored`, keeps exit `0`, and shows `Coverage` as unavailable
+ in the normal `Metrics` block. It becomes a contract error only when
+ `--fail-on-untested-hotspots` requires a valid join.
+- `--fail-on-untested-hotspots` requires `--coverage` and a valid Cobertura XML
+ input. It exits `3` when medium/high-risk functions measured by Coverage Join
+ fall below `--coverage-min` (default `50`). Functions outside the supplied
+ `coverage.xml` scope are surfaced separately and do not trigger this gate.
+ The flag name is retained for CLI compatibility.
Refs:
@@ -136,9 +161,13 @@ Refs:
| `--diff-against` + `--paths-from-git-diff` | contract | 2 |
| Baseline untrusted in CI/gating | contract | 2 |
| Coverage/API regression gate without required metrics-baseline capability | contract | 2 |
+| `--fail-on-untested-hotspots` without `--coverage` | contract | 2 |
+| Invalid Cobertura XML without hotspot gating | warning only | 0 |
+| Invalid Cobertura XML for coverage hotspot gating | contract | 2 |
| Unreadable source in CI/gating | contract | 2 |
| New clones with `--fail-on-new` | gating | 3 |
| Threshold exceeded | gating | 3 |
+| Coverage hotspots with `--fail-on-untested-hotspots` | gating | 3 |
| Unexpected exception | internal | 5 |
## Determinism / canonicalization
diff --git a/docs/book/10-html-render.md b/docs/book/10-html-render.md
index d11bc02..42f7530 100644
--- a/docs/book/10-html-render.md
+++ b/docs/book/10-html-render.md
@@ -47,8 +47,19 @@ Refs:
- Hotspots by Directory: render-only view over `derived.overview.directory_hotspots`
- Health Profile: full-width radar chart of dimension scores
- Get Badge modal: grade-only / score+grade variants with shields.io embed
+- Quality UI is also a report projection:
+ - deterministic subtabs for complexity, coupling, cohesion, overloaded
+ modules, and `Coverage Join` when canonical join facts exist
+ - `Coverage Join` uses the same stat-card and table patterns as other
+ quality surfaces; it separates measured coverage hotspots from coverage
+ scope gaps, and invalid joins render a factual unavailable state instead
+ of a success-style empty message
- Dead-code UI is a single top-level `Dead Code` tab with deterministic split
sub-tabs: `Active` and `Suppressed`.
+- Clones UI may append a `Suppressed` sub-tab when canonical report data
+ includes `findings.groups.clones.suppressed.*`; those rows are factual
+ projections of policy-excluded clone groups such as `golden_fixture_paths`
+ and do not become active clone findings.
- IDE deep links:
- An IDE picker in the topbar lets users choose their IDE. The selection is
persisted in `localStorage` (key `codeclone-ide`).
diff --git a/docs/book/13-testing-as-spec.md b/docs/book/13-testing-as-spec.md
index a5fbcd2..482c942 100644
--- a/docs/book/13-testing-as-spec.md
+++ b/docs/book/13-testing-as-spec.md
@@ -11,9 +11,13 @@ Contract tests are concentrated in:
- `tests/test_baseline.py`
- `tests/test_cache.py`
- `tests/test_report.py`
+- `tests/test_report_contract_coverage.py`
- `tests/test_cli_inprocess.py`
- `tests/test_cli_unit.py`
+- `tests/test_coverage_join.py`
+- `tests/test_golden_fixtures.py`
- `tests/test_html_report.py`
+- `tests/test_mcp_service.py`
- `tests/test_detector_golden.py`
- `tests/test_golden_v2.py`
@@ -29,14 +33,16 @@ Test classes by role:
The following matrix is treated as executable contract:
-| Contract | Tests |
-|--------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
-| Baseline schema/integrity/compat gates | `tests/test_baseline.py` |
-| Cache v2.3 fail-open + status mapping | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag` |
-| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` |
-| Report schema v2.5 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` |
-| HTML render-only explainability + escaping | `tests/test_html_report.py` |
-| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` |
+| Contract | Tests |
+|--------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|
+| Baseline schema/integrity/compat gates | `tests/test_baseline.py` |
+| Cache v2.4 fail-open + status mapping + API signature order preservation | `tests/test_cache.py`, `tests/test_cli_inprocess.py::test_cli_reports_cache_too_large_respects_max_size_flag`, `tests/test_cli_inprocess.py::test_cli_public_api_breaking_count_stable_across_warm_cache` |
+| Exit code categories and markers | `tests/test_cli_unit.py`, `tests/test_cli_inprocess.py` |
+| Report schema v2.8 canonical/derived/integrity + JSON/TXT/MD/SARIF projections | `tests/test_report.py`, `tests/test_report_contract_coverage.py`, `tests/test_report_branch_invariants.py` |
+| HTML render-only explainability + escaping | `tests/test_html_report.py` |
+| Current-run Cobertura coverage join parsing, gating, and projections | `tests/test_coverage_join.py`, `tests/test_pipeline_metrics.py`, `tests/test_cli_unit.py`, `tests/test_mcp_service.py`, `tests/test_html_report.py` |
+| Golden fixture clone exclusion policy | `tests/test_golden_fixtures.py`, `tests/test_cli_inprocess.py::test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups`, `tests/test_report.py::test_report_json_clone_groups_can_include_suppressed_golden_fixture_bucket` |
+| Scanner traversal safety | `tests/test_scanner_extra.py`, `tests/test_security.py` |
## Invariants (MUST)
@@ -73,6 +79,7 @@ Refs:
- `tests/test_baseline.py::test_baseline_payload_fields_contract_invariant`
- `tests/test_cache.py::test_cache_v13_missing_optional_sections_default_empty`
- `tests/test_report.py::test_report_json_compact_v21_contract`
+- `tests/test_coverage_join.py::test_build_coverage_join_maps_cobertura_lines_to_function_spans`
- `tests/test_cli_inprocess.py::test_cli_contract_error_priority_over_gating_failure_for_unreadable_source`
- `tests/test_html_report.py::test_html_and_json_group_order_consistent`
- `tests/test_detector_golden.py::test_detector_output_matches_golden_fixture`
diff --git a/docs/book/14-compatibility-and-versioning.md b/docs/book/14-compatibility-and-versioning.md
index 55ba2db..5e85b84 100644
--- a/docs/book/14-compatibility-and-versioning.md
+++ b/docs/book/14-compatibility-and-versioning.md
@@ -20,8 +20,8 @@ Current contract versions:
- `BASELINE_SCHEMA_VERSION = "2.1"`
- `BASELINE_FINGERPRINT_VERSION = "1"`
-- `CACHE_VERSION = "2.3"`
-- `REPORT_SCHEMA_VERSION = "2.5"`
+- `CACHE_VERSION = "2.4"`
+- `REPORT_SCHEMA_VERSION = "2.8"`
- `METRICS_BASELINE_SCHEMA_VERSION = "1.2"` (used only when metrics are stored
in a dedicated metrics-baseline file instead of the default unified baseline)
@@ -69,9 +69,13 @@ Version bump rules:
`report_schema_version` because they alter canonical report semantics and
integrity payload.
- The same is true for additive canonical metrics families such as
- `metrics.families.overloaded_modules`: even though the layer is report-only and does
- not affect health/gates/findings, it still changes canonical report schema
- and integrity payload, so it requires a report-schema bump.
+ `metrics.families.overloaded_modules`, `coverage_adoption`, `api_surface`,
+ or `coverage_join`: even when the layer is report-only or current-run only,
+ it still changes canonical report schema and integrity payload, so it
+ requires a report-schema bump.
+- The same rule applies to new canonical suppressed-finding buckets such as
+ `findings.groups.clones.suppressed.*`: even though they are non-active
+ review facts, they still change canonical report shape and integrity payload.
- CodeClone does not currently define a separate health-model version constant.
Health-score semantics are package-versioned and must be documented in the
Health Score chapter and release notes when they change.
diff --git a/docs/book/15-metrics-and-quality-gates.md b/docs/book/15-metrics-and-quality-gates.md
index 521e61a..61b830c 100644
--- a/docs/book/15-metrics-and-quality-gates.md
+++ b/docs/book/15-metrics-and-quality-gates.md
@@ -20,6 +20,9 @@ Metrics gate inputs:
`--fail-complexity`, `--fail-coupling`, `--fail-cohesion`, `--fail-health`
- adoption threshold gates:
`--min-typing-coverage`, `--min-docstring-coverage`
+- external Cobertura coverage join:
+ `--coverage FILE`, `--coverage-min PERCENT`,
+ `--fail-on-untested-hotspots`
- boolean structural gates:
`--fail-cycles`, `--fail-dead-code`
- baseline-aware delta gates:
@@ -54,6 +57,10 @@ Refs:
- `--skip-metrics` is incompatible with metrics gating/update flags and is a
contract error.
+- `golden_fixture_paths` is a separate project-level clone policy:
+ clone groups fully contained in matching `tests/` / `tests/fixtures/` paths
+ are excluded before health/gate/suggestion evaluation, but remain visible as
+ suppressed report facts.
- If metrics are not explicitly requested and no metrics baseline exists,
runtime auto-enables clone-only mode (`skip_metrics=true`).
- In clone-only mode:
@@ -61,12 +68,21 @@ Refs:
- `--fail-dead-code` forces dead-code analysis on (even if metrics are skipped).
- `--fail-cycles` forces dependency analysis on (even if metrics are skipped).
- Type/docstring adoption metrics are computed by default in full mode.
+- `--coverage` joins an external Cobertura XML file to current-run function
+ spans with stdlib XML parsing only. This signal is not metrics-baseline truth,
+ is not written to `codeclone.baseline.json`, and does not affect fingerprint
+ or clone identity semantics.
+- Invalid Cobertura XML downgrades to a current-run
+ `coverage_join.status="invalid"` signal in normal analysis. It does not fail
+ the run or update any baseline; only `--fail-on-untested-hotspots` upgrades
+ invalid input into a contract error.
- `--api-surface` is opt-in in normal runs, but runtime auto-enables it when
`--fail-on-api-break` or `--update-metrics-baseline` needs a public API
snapshot.
- In the normal CLI `Metrics` block, adoption coverage is shown whenever metrics
are computed, and the public API surface line appears when `api_surface`
- facts were collected.
+ facts were collected. A coverage line appears when `--coverage` produced a
+ joined coverage summary.
- `--update-baseline` in full mode implies metrics-baseline update in the same
run.
- If metrics baseline path equals clone baseline path and clone baseline file is
@@ -78,6 +94,11 @@ Refs:
metrics baseline that already contains adoption coverage data.
- `--fail-on-api-break` requires a metrics baseline that already contains
`api_surface` data.
+- `--fail-on-untested-hotspots` requires `--coverage` and a valid Cobertura XML
+ input. It evaluates current-run `coverage_join` facts only for measured
+ medium/high-risk functions below the configured threshold; scope gaps are
+ surfaced separately and do not require or update a metrics baseline. The
+ flag name is retained for CLI compatibility.
- In CI mode, if metrics baseline was loaded and trusted, runtime enables
`fail_on_new_metrics=true`.
@@ -93,7 +114,7 @@ Refs:
metrics were computed and metrics baseline is trusted.
- Metric gate reasons are emitted in deterministic order:
threshold checks -> cycles/dead/health -> NEW-vs-baseline diffs ->
- adoption/API baseline diffs.
+ adoption/API baseline diffs -> coverage-join hotspot gate.
- Metric gate reasons are namespaced as `metric:*` in gate output.
Refs:
@@ -103,13 +124,16 @@ Refs:
## Failure modes
-| Condition | Behavior |
-|-------------------------------------------------------------|--------------------------|
-| `--skip-metrics` with metrics flags | Contract error, exit `2` |
-| `--fail-on-new-metrics` without trusted baseline | Contract error, exit `2` |
-| Coverage/API regression gate without required baseline data | Contract error, exit `2` |
-| `--update-metrics-baseline` when metrics were not computed | Contract error, exit `2` |
-| Threshold breach or NEW-vs-baseline metric regressions | Gating failure, exit `3` |
+| Condition | Behavior |
+|-------------------------------------------------------------|--------------------------------------|
+| `--skip-metrics` with metrics flags | Contract error, exit `2` |
+| `--fail-on-new-metrics` without trusted baseline | Contract error, exit `2` |
+| Coverage/API regression gate without required baseline data | Contract error, exit `2` |
+| Invalid Cobertura XML without hotspot gate | Current-run invalid signal, exit `0` |
+| Coverage hotspot gate without valid `--coverage` input | Contract error, exit `2` |
+| `--update-metrics-baseline` when metrics were not computed | Contract error, exit `2` |
+| Threshold breach or NEW-vs-baseline metric regressions | Gating failure, exit `3` |
+| Coverage hotspots from current-run coverage join | Gating failure, exit `3` |
## Determinism / canonicalization
diff --git a/docs/book/20-mcp-interface.md b/docs/book/20-mcp-interface.md
index 4427caf..2c750df 100644
--- a/docs/book/20-mcp-interface.md
+++ b/docs/book/20-mcp-interface.md
@@ -70,6 +70,9 @@ Current server characteristics:
- flattened `diff` (`new_clones`, `health_delta`,
`typing_param_permille_delta`, `typing_return_permille_delta`,
`docstring_permille_delta`, `api_breaking_changes`, `new_api_symbols`)
+ - optional `coverage_join` when an analysis request included
+ `coverage_xml` (`status`, `overall_permille`, `coverage_hotspots`,
+ `scope_gap_hotspots`, `hotspot_threshold_percent`)
- `warnings`, `failures`
- `analyze_changed_paths` is intentionally more compact than `get_run_summary`:
it returns `changed_files`, compact `baseline`, `focus`, `health_scope`,
@@ -102,29 +105,29 @@ produced by the report contract.
Current tool set (`21` tools):
-| Tool | Key parameters | Purpose |
-|--------------------------|-----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------|
-| `analyze_repository` | absolute `root`, `analysis_mode`, thresholds, `api_surface`, cache/baseline paths | Full analysis → compact summary; then `get_run_summary` or `get_production_triage` |
-| `analyze_changed_paths` | absolute `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, `api_surface` | Diff-aware analysis → compact changed-files snapshot |
-| `get_run_summary` | `run_id` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds |
-| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first view: health, hotspots, suggestions, active thresholds |
-| `help` | `topic`, `detail` | Semantic guide for workflow, analysis profile, baseline, suppressions, review state, changed-scope |
-| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta: regressions, improvements, health change |
-| `evaluate_gates` | `run_id`, gate thresholds | Preview CI gating decisions |
-| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read report sections; `metrics_detail` is paginated with family/path filters |
-| `list_findings` | `family`, `severity`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, pagination | Filtered, paginated findings; use after hotspots or `check_*` |
-| `get_finding` | `finding_id`, `run_id`, `detail_level` | Single finding detail by id; defaults to `normal` |
-| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation payload for one finding |
-| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `limit` | Priority-ranked hotspot views; preferred before broad listing |
-| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `detail_level` | Clone findings only; `health.dimensions` includes only `clones` |
-| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `detail_level` | Complexity hotspots only |
-| `check_coupling` | `run_id`, `root`, `path`, `detail_level` | Coupling hotspots only |
-| `check_cohesion` | `run_id`, `root`, `path`, `detail_level` | Cohesion hotspots only |
-| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `detail_level` | Dead-code findings only |
-| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-friendly markdown or JSON summary |
-| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) |
-| `list_reviewed_findings` | `run_id` | List reviewed findings for a run |
-| `clear_session_runs` | none | Reset in-memory runs and session state |
+| Tool | Key parameters | Purpose |
+|--------------------------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------|
+| `analyze_repository` | absolute `root`, `analysis_mode`, thresholds, `api_surface`, `coverage_xml`, cache/baseline paths | Full analysis → compact summary; then `get_run_summary` or `get_production_triage` |
+| `analyze_changed_paths` | absolute `root`, `changed_paths` or `git_diff_ref`, `analysis_mode`, `api_surface`, `coverage_xml` | Diff-aware analysis → compact changed-files snapshot |
+| `get_run_summary` | `run_id` | Cheapest run snapshot: health, findings, baseline, inventory, active thresholds |
+| `get_production_triage` | `run_id`, `max_hotspots`, `max_suggestions` | Production-first view: health, hotspots, suggestions, active thresholds |
+| `help` | `topic`, `detail` | Semantic guide for workflow, analysis profile, baseline, coverage, suppressions, review state, changed-scope |
+| `compare_runs` | `run_id_before`, `run_id_after`, `focus` | Run-to-run delta: regressions, improvements, health change |
+| `evaluate_gates` | `run_id`, gate thresholds, `fail_on_untested_hotspots`, `coverage_min` | Preview CI gating decisions |
+| `get_report_section` | `run_id`, `section`, `family`, `path`, `offset`, `limit` | Read report sections; `metrics_detail` is paginated with family/path filters |
+| `list_findings` | `family`, `severity`, `novelty`, `sort_by`, `detail_level`, `changed_paths`, pagination | Filtered, paginated findings; use after hotspots or `check_*` |
+| `get_finding` | `finding_id`, `run_id`, `detail_level` | Single finding detail by id; defaults to `normal` |
+| `get_remediation` | `finding_id`, `run_id`, `detail_level` | Remediation payload for one finding |
+| `list_hotspots` | `kind`, `run_id`, `detail_level`, `changed_paths`, `limit` | Priority-ranked hotspot views; preferred before broad listing |
+| `check_clones` | `run_id`, `root`, `path`, `clone_type`, `source_kind`, `detail_level` | Clone findings only; `health.dimensions` includes only `clones` |
+| `check_complexity` | `run_id`, `root`, `path`, `min_complexity`, `detail_level` | Complexity hotspots only |
+| `check_coupling` | `run_id`, `root`, `path`, `detail_level` | Coupling hotspots only |
+| `check_cohesion` | `run_id`, `root`, `path`, `detail_level` | Cohesion hotspots only |
+| `check_dead_code` | `run_id`, `root`, `path`, `min_severity`, `detail_level` | Dead-code findings only |
+| `generate_pr_summary` | `run_id`, `changed_paths`, `git_diff_ref`, `format` | PR-friendly markdown or JSON summary |
+| `mark_finding_reviewed` | `finding_id`, `run_id`, `note` | Session-local review marker (in-memory) |
+| `list_reviewed_findings` | `run_id` | List reviewed findings for a run |
+| `clear_session_runs` | none | Reset in-memory runs and session state |
All tools are read-only except `mark_finding_reviewed` and `clear_session_runs`
(session-local, in-memory). `check_*` tools query stored runs — call
@@ -139,7 +142,8 @@ Recommended workflow:
5. `generate_pr_summary(format="markdown")`
`metrics_detail` families currently include canonical health/quality families
-plus `overloaded_modules`, `coverage_adoption`, and `api_surface`.
+plus `overloaded_modules`, `coverage_adoption`, `coverage_join`, and
+`api_surface`.
For analysis sensitivity, the intended model is:
@@ -200,6 +204,17 @@ state behind `codeclone://latest/...`.
- baseline trust semantics
- cache semantics
- canonical report contract
+- `coverage_xml` is resolved relative to the absolute root when it is not
+ already absolute. It is a current-run Cobertura input only; MCP must never
+ write it to baseline/cache/report artifacts or treat it as baseline truth.
+- When `respect_pyproject=true`, MCP also respects `golden_fixture_paths`.
+ Clone groups excluded by that policy are omitted from active clone/gate
+ projections but remain available in the canonical report under the optional
+ `findings.groups.clones.suppressed.*` bucket.
+- Invalid Cobertura XML during `analyze_*` does not fail analysis; the stored
+ run carries `coverage_join.status="invalid"` plus `invalid_reason`.
+ `evaluate_gates(fail_on_untested_hotspots=true)` on that run is a contract
+ error because hotspot gating requires a valid join.
- Inline MCP design-threshold parameters (`complexity_threshold`,
`coupling_threshold`, `cohesion_threshold`) define the canonical design
finding universe of that run and are recorded in
@@ -217,6 +232,10 @@ state behind `codeclone://latest/...`.
- `metrics_detail(family="overloaded_modules")` exposes the canonical report-only
module-hotspot layer, but does not promote it into findings, hotlists, or
gate semantics.
+- `metrics_detail(family="coverage_join")` exposes the canonical current-run
+ coverage join summary/items, including measured coverage hotspots and
+ coverage scope gaps. `evaluate_gates(fail_on_untested_hotspots=true)`
+ requires a stored run created with valid `coverage_xml`.
- `get_remediation` is a deterministic MCP projection over existing
suggestions/explainability data, not a second remediation engine.
- `analysis_mode="clones_only"` must mirror the same metric/dependency
@@ -276,18 +295,22 @@ state behind `codeclone://latest/...`.
it returns `mixed` when run-to-run finding deltas and `health_delta` disagree.
- `analysis_mode="clones_only"` keeps clone findings fully usable, but MCP
surfaces mark `health` as unavailable instead of fabricating zeroed metrics.
+- `coverage_xml` requires `analysis_mode="full"` because coverage join depends
+ on function-span metrics.
- `codeclone://latest/triage` is a latest-only resource; run-specific triage is
available via the tool, not via a `codeclone://runs/{run_id}/...` resource URI.
## Failure modes
-| Condition | Behavior |
-|--------------------------------------------|---------------------------------------------------|
-| `mcp` extra not installed | `codeclone-mcp` prints install hint and exits `2` |
-| Invalid root path / invalid numeric config | service raises contract error |
-| Requested run missing | service raises run-not-found error |
-| Requested finding missing | service raises finding-not-found error |
-| Unsupported report section/resource suffix | service raises contract error |
+| Condition | Behavior |
+|---------------------------------------------------|---------------------------------------------------|
+| `mcp` extra not installed | `codeclone-mcp` prints install hint and exits `2` |
+| Invalid root path / invalid numeric config | service raises contract error |
+| `coverage_xml` with `analysis_mode="clones_only"` | service raises contract error |
+| Coverage hotspot gate without valid coverage join | service raises contract error |
+| Requested run missing | service raises run-not-found error |
+| Requested finding missing | service raises finding-not-found error |
+| Unsupported report section/resource suffix | service raises contract error |
## Determinism / canonicalization
@@ -297,6 +320,8 @@ state behind `codeclone://latest/...`.
- No MCP-only heuristics may change analysis or gating semantics.
- MCP must not re-synthesize design findings from raw metrics after the run;
threshold-aware design findings belong to the canonical report document.
+- Coverage join ordering and hotspot gates are inherited from canonical
+ `metrics.families.coverage_join` facts.
## Locked by tests
diff --git a/docs/book/appendix/b-schema-layouts.md b/docs/book/appendix/b-schema-layouts.md
index 86d3d13..54b0b4c 100644
--- a/docs/book/appendix/b-schema-layouts.md
+++ b/docs/book/appendix/b-schema-layouts.md
@@ -91,11 +91,11 @@ Notes:
}
```
-## Cache schema (`2.3`)
+## Cache schema (`2.4`)
```json
{
- "v": "2.3",
+ "v": "2.4",
"payload": {
"py": "cp313",
"fp": "1",
@@ -140,14 +140,16 @@ Notes:
- `ss` stores per-file source stats and is required for full cache-hit accounting
in discovery.
- `rn`/`rq` are optional and decode to empty arrays when absent.
+- Cached public-API symbol payloads preserve declaration order for `params`;
+ canonicalization must not rewrite callable signature order.
- `u` row decoder accepts both legacy 11-column rows and canonical 17-column rows
(legacy rows map new structural fields to neutral defaults).
-## Report schema (`2.5`)
+## Report schema (`2.8`)
```json
{
- "report_schema_version": "2.5",
+ "report_schema_version": "2.8",
"meta": {
"codeclone_version": "2.0.0b5",
"project_name": "codeclone",
@@ -199,14 +201,24 @@ Notes:
"summary": {
"...": "...",
"suppressed": {
- "dead_code": 0
+ "dead_code": 0,
+ "clones": 1
}
},
"groups": {
"clones": {
"functions": [],
"blocks": [],
- "segments": []
+ "segments": [],
+ "suppressed": {
+ "functions": [
+ {
+ "...": "..."
+ }
+ ],
+ "blocks": [],
+ "segments": []
+ }
},
"structural": {
"groups": [
@@ -260,6 +272,21 @@ Notes:
"docstring_permille": 0,
"typing_any_count": 0
},
+ "coverage_join": {
+ "status": "ok",
+ "source": "coverage.xml",
+ "files": 0,
+ "units": 0,
+ "measured_units": 0,
+ "overall_executable_lines": 0,
+ "overall_covered_lines": 0,
+ "overall_permille": 0,
+ "missing_from_report_units": 0,
+ "coverage_hotspots": 0,
+ "scope_gap_hotspots": 0,
+ "hotspot_threshold_percent": 50,
+ "invalid_reason": null
+ },
"api_surface": {
"enabled": false,
"modules": 0,
@@ -322,6 +349,24 @@ Notes:
},
"items": []
},
+ "coverage_join": {
+ "summary": {
+ "status": "ok",
+ "source": "coverage.xml",
+ "files": 0,
+ "units": 0,
+ "measured_units": 0,
+ "overall_executable_lines": 0,
+ "overall_covered_lines": 0,
+ "overall_permille": 0,
+ "missing_from_report_units": 0,
+ "coverage_hotspots": 0,
+ "scope_gap_hotspots": 0,
+ "hotspot_threshold_percent": 50,
+ "invalid_reason": null
+ },
+ "items": []
+ },
"api_surface": {
"summary": {
"enabled": false,
@@ -393,7 +438,7 @@ Notes:
```text
# CodeClone Report
- Markdown schema: 1.0
-- Source report schema: 2.5
+- Source report schema: 2.8
...
## Overview
## Inventory
@@ -479,7 +524,7 @@ Notes:
],
"properties": {
"profileVersion": "1.0",
- "reportSchemaVersion": "2.5"
+ "reportSchemaVersion": "2.8"
},
"results": [
{
diff --git a/docs/mcp.md b/docs/mcp.md
index a438644..ac7d5b4 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -126,11 +126,23 @@ run-scoped URI templates.
- Summary `diff` also carries compact adoption/API deltas:
`typing_param_permille_delta`, `typing_return_permille_delta`,
`docstring_permille_delta`, `api_breaking_changes`, and `new_api_symbols`.
+- When `analyze_repository` or `analyze_changed_paths` receives
+ `coverage_xml`, summaries include compact `coverage_join` facts. The XML path
+ may be absolute or relative to the analysis root, and the join remains a
+ current-run signal rather than baseline truth.
+- When `respect_pyproject=true`, MCP also applies `golden_fixture_paths`.
+ Fully matching golden-fixture clone groups are excluded from active clone and
+ gate projections but remain visible in the canonical report under the
+ optional `findings.groups.clones.suppressed.*` bucket.
+- Invalid Cobertura XML does not fail `analyze_*`; summaries expose
+ `coverage_join.status="invalid"` plus `invalid_reason`. Coverage hotspot gate
+ preview still requires a valid join.
- Run IDs are 8-char hex handles; finding IDs are short prefixed forms.
Both accept the full canonical form as input.
- `metrics_detail(family="overloaded_modules")` exposes the report-only
module-hotspot layer without turning it into findings or gate data.
-- `metrics_detail` also accepts `coverage_adoption` and `api_surface`.
+- `metrics_detail` also accepts `coverage_adoption`, `coverage_join`, and
+ `api_surface`.
- `help(topic=...)` is static: meaning, anti-patterns, next step, doc links.
- Start with repo defaults or `pyproject`-resolved thresholds, then lower them
only for an explicit higher-sensitivity exploratory pass.
@@ -178,7 +190,7 @@ analyze_repository → get_run_summary or get_production_triage
### Semantic uncertainty recovery
```
-help(topic="workflow" | "analysis_profile" | "baseline" | "suppressions" | "latest_runs" | "review_state" | "changed_scope")
+help(topic="workflow" | "analysis_profile" | "baseline" | "coverage" | "suppressions" | "latest_runs" | "review_state" | "changed_scope")
```
### Full repository review
@@ -197,6 +209,17 @@ analyze_repository(api_surface=true) # when you need API inventory/diff
→ compare_runs
```
+### Coverage hotspot review
+
+```
+analyze_repository(coverage_xml="coverage.xml")
+→ metrics_detail(family="coverage_join")
+→ evaluate_gates(fail_on_untested_hotspots=true, coverage_min=50)
+
+Coverage Join in MCP separates measured `coverage_hotspots` from
+`scope_gap_hotspots` (functions outside the supplied `coverage.xml` scope).
+```
+
### Changed-files review (PR / patch)
```
@@ -243,6 +266,8 @@ Separate accepted baseline debt from new regressions.
- Keep `git_diff_ref` to a safe single revision expression; option-like,
whitespace-containing, and punctuated shell-style inputs are rejected.
- Pass an absolute `root` — MCP rejects relative roots like `.`.
+- Use `coverage_xml` only with `analysis_mode="full"`; clones-only analysis does
+ not collect the function-span facts needed for coverage join.
- Use `"production-only"` / `source_kind` filters to cut test/fixture noise.
- Use `mark_finding_reviewed` + `exclude_reviewed=true` in long sessions.
diff --git a/docs/sarif.md b/docs/sarif.md
index 3f3b7b1..2bb2518 100644
--- a/docs/sarif.md
+++ b/docs/sarif.md
@@ -68,6 +68,12 @@ For clone results, CodeClone also carries novelty-aware metadata when known:
This improves usefulness in IDE/code-scanning flows that distinguish new vs
known findings.
+Coverage join can materialize `coverage` / `coverage_hotspot` and
+`coverage_scope_gap` design findings when the canonical report already
+contains valid `metrics.families.coverage_join` facts. SARIF projects those
+findings like other design findings; it does not parse Cobertura XML or create
+coverage-specific analysis truth.
+
## Rule metadata
Rule records are intentionally richer than a minimal SARIF export.
diff --git a/pyproject.toml b/pyproject.toml
index 6bc0987..c2abe99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,6 +100,22 @@ addopts = "-ra"
branch = true
source = ["codeclone"]
+[tool.codeclone]
+baseline = "codeclone.baseline.json"
+min_loc = 6
+min_stmt = 4
+fail_on_new = true
+fail_cycles = true
+fail_dead_code = true
+fail_health = 87
+fail_on_new_metrics = true
+typing_coverage = false
+docstring_coverage = false
+api_surface = false
+golden_fixture_paths = ["tests/fixtures/golden_*"]
+min_typing_coverage = 99
+
+
[tool.coverage.report]
show_missing = true
fail_under = 99
diff --git a/tests/_assertions.py b/tests/_assertions.py
index 1f4dd4a..8447d62 100644
--- a/tests/_assertions.py
+++ b/tests/_assertions.py
@@ -14,6 +14,11 @@ def assert_contains_all(text: str, *needles: str) -> None:
assert needle in text
+def assert_contains_none(text: str, *needles: str) -> None:
+ for needle in needles:
+ assert needle not in text
+
+
def assert_mapping_entries(
mapping: Mapping[str, object],
/,
@@ -23,6 +28,11 @@ def assert_mapping_entries(
assert mapping[key] == value
+def assert_missing_keys(mapping: Mapping[str, object], /, *keys: str) -> None:
+ for key in keys:
+ assert key not in mapping
+
+
def snapshot_python_tag(snapshot: Mapping[str, object]) -> str:
meta = snapshot.get("meta", {})
assert isinstance(meta, dict)
diff --git a/tests/_ast_metrics_helpers.py b/tests/_ast_metrics_helpers.py
new file mode 100644
index 0000000..cd75a99
--- /dev/null
+++ b/tests/_ast_metrics_helpers.py
@@ -0,0 +1,29 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from __future__ import annotations
+
+import ast
+
+from codeclone import extractor
+from codeclone.qualnames import QualnameCollector
+
+
+def tree_collector_and_imports(
+ source: str,
+ *,
+ module_name: str,
+) -> tuple[ast.Module, QualnameCollector, frozenset[str]]:
+ tree = ast.parse(source)
+ collector = QualnameCollector()
+ collector.visit(tree)
+ walk = extractor._collect_module_walk_data(
+ tree=tree,
+ module_name=module_name,
+ collector=collector,
+ collect_referenced_names=True,
+ )
+ return tree, collector, walk.import_names
diff --git a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json
index 1417c3a..ab5236f 100644
--- a/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json
+++ b/tests/fixtures/golden_v2/pyproject_defaults/golden_expected_cli_snapshot.json
@@ -2,7 +2,7 @@
"meta": {
"python_tag": "cp313"
},
- "report_schema_version": "2.5",
+ "report_schema_version": "2.8",
"project_name": "pyproject_defaults",
"scan_root": ".",
"baseline_status": "missing",
diff --git a/tests/test_adoption.py b/tests/test_adoption.py
index 76ed21b..a4852f9 100644
--- a/tests/test_adoption.py
+++ b/tests/test_adoption.py
@@ -8,35 +8,18 @@
import ast
-from codeclone import extractor
from codeclone.metrics import _visibility as visibility_mod
from codeclone.metrics import adoption as adoption_mod
from codeclone.metrics._visibility import build_module_visibility
from codeclone.metrics.adoption import collect_module_adoption
from codeclone.qualnames import QualnameCollector
-
-
-def _tree_collector_and_imports(
- source: str,
- *,
- module_name: str,
-) -> tuple[ast.Module, QualnameCollector, frozenset[str]]:
- tree = ast.parse(source)
- collector = QualnameCollector()
- collector.visit(tree)
- walk = extractor._collect_module_walk_data(
- tree=tree,
- module_name=module_name,
- collector=collector,
- collect_referenced_names=True,
- )
- return tree, collector, walk.import_names
+from tests._ast_metrics_helpers import tree_collector_and_imports
def test_build_module_visibility_supports_strict_dunder_all_for_private_modules() -> (
None
):
- tree, collector, import_names = _tree_collector_and_imports(
+ tree, collector, import_names = tree_collector_and_imports(
"""
__all__ = ["public_fn", "PublicClass"]
@@ -66,7 +49,7 @@ class PublicClass:
def test_collect_module_adoption_counts_annotations_docstrings_and_any() -> None:
- tree, collector, import_names = _tree_collector_and_imports(
+ tree, collector, import_names = tree_collector_and_imports(
"""
from typing import Any
@@ -120,7 +103,7 @@ def _hidden(self, value: int) -> None:
def test_visibility_helpers_cover_private_modules_and_declared_all_edges() -> None:
- tree, collector, import_names = _tree_collector_and_imports(
+ tree, collector, import_names = tree_collector_and_imports(
"""
items: list[str] = []
_private = 1
diff --git a/tests/test_api_surface.py b/tests/test_api_surface.py
index 8190a7b..7cf1054 100644
--- a/tests/test_api_surface.py
+++ b/tests/test_api_surface.py
@@ -9,7 +9,6 @@
import ast
from typing import Literal, cast
-from codeclone import extractor
from codeclone.metrics import api_surface as api_surface_mod
from codeclone.metrics._visibility import ModuleVisibility
from codeclone.metrics.api_surface import (
@@ -22,28 +21,11 @@
ModuleApiSurface,
PublicSymbol,
)
-from codeclone.qualnames import QualnameCollector
-
-
-def _tree_collector_and_imports(
- source: str,
- *,
- module_name: str,
-) -> tuple[ast.Module, QualnameCollector, frozenset[str]]:
- tree = ast.parse(source)
- collector = QualnameCollector()
- collector.visit(tree)
- walk = extractor._collect_module_walk_data(
- tree=tree,
- module_name=module_name,
- collector=collector,
- collect_referenced_names=True,
- )
- return tree, collector, walk.import_names
+from tests._ast_metrics_helpers import tree_collector_and_imports
def test_collect_module_api_surface_skips_self_and_collects_public_symbols() -> None:
- tree, collector, import_names = _tree_collector_and_imports(
+ tree, collector, import_names = tree_collector_and_imports(
"""
__all__ = ["run", "Public", "VALUE"]
@@ -227,7 +209,7 @@ def _public_symbol(
def test_collect_module_api_surface_skips_private_or_empty_modules() -> None:
- private_tree, private_collector, private_imports = _tree_collector_and_imports(
+ private_tree, private_collector, private_imports = tree_collector_and_imports(
"""
def hidden():
return 1
@@ -245,7 +227,7 @@ def hidden():
is None
)
- empty_tree, empty_collector, empty_imports = _tree_collector_and_imports(
+ empty_tree, empty_collector, empty_imports = tree_collector_and_imports(
"""
def _hidden():
return 1
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..1963edc
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,80 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from __future__ import annotations
+
+import pytest
+
+from benchmarks.run_benchmark import (
+ RunMeasurement,
+ Scenario,
+ _validate_inventory_sample,
+)
+
+
+def _measurement(
+ *,
+ found: int,
+ analyzed: int,
+ cached: int,
+ skipped: int = 0,
+) -> RunMeasurement:
+ return RunMeasurement(
+ elapsed_seconds=0.1,
+ digest="digest",
+ files_found=found,
+ files_analyzed=analyzed,
+ files_cached=cached,
+ files_skipped=skipped,
+ )
+
+
+def test_benchmark_inventory_validation_accepts_valid_cold_and_warm_samples() -> None:
+ _validate_inventory_sample(
+ scenario=Scenario(name="cold_full", mode="cold", extra_args=()),
+ measurement=_measurement(found=10, analyzed=10, cached=0),
+ )
+ _validate_inventory_sample(
+ scenario=Scenario(name="warm_full", mode="warm", extra_args=()),
+ measurement=_measurement(found=10, analyzed=0, cached=10),
+ )
+
+
+@pytest.mark.parametrize(
+ ("scenario", "measurement", "message"),
+ (
+ (
+ Scenario(name="cold_full", mode="cold", extra_args=()),
+ _measurement(found=10, analyzed=0, cached=0, skipped=10),
+ "skipped 10 files",
+ ),
+ (
+ Scenario(name="cold_full", mode="cold", extra_args=()),
+ _measurement(found=10, analyzed=9, cached=1),
+ "unexpectedly used cache",
+ ),
+ (
+ Scenario(name="warm_full", mode="warm", extra_args=()),
+ _measurement(found=10, analyzed=10, cached=0),
+ "did not use cache",
+ ),
+ (
+ Scenario(name="warm_full", mode="warm", extra_args=()),
+ _measurement(found=10, analyzed=1, cached=9),
+ "analyzed files unexpectedly",
+ ),
+ ),
+)
+def test_benchmark_inventory_validation_rejects_invalid_samples(
+ scenario: Scenario,
+ measurement: RunMeasurement,
+ message: str,
+) -> None:
+ with pytest.raises(RuntimeError, match=message):
+ _validate_inventory_sample(
+ scenario=scenario,
+ measurement=measurement,
+ )
diff --git a/tests/test_cache.py b/tests/test_cache.py
index 49ddcdf..82674a3 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -21,6 +21,7 @@
from codeclone.cache_paths import runtime_filepath_from_wire, wire_filepath_from_runtime
from codeclone.errors import CacheError
from codeclone.extractor import Unit
+from codeclone.models import ApiParamSpec, FileMetrics, ModuleApiSurface, PublicSymbol
def _make_unit(filepath: str) -> Unit:
@@ -110,6 +111,61 @@ def test_cache_roundtrip_preserves_empty_structural_findings(tmp_path: Path) ->
assert entry["structural_findings"] == []
+def test_cache_roundtrip_preserves_api_surface_parameter_order(
+ tmp_path: Path,
+) -> None:
+ cache_path = tmp_path / "cache.json"
+ cache = Cache(cache_path)
+ cache.put_file_entry(
+ "x.py",
+ {"mtime_ns": 1, "size": 10},
+ [],
+ [],
+ [],
+ file_metrics=FileMetrics(
+ class_metrics=(),
+ module_deps=(),
+ dead_candidates=(),
+ referenced_names=frozenset(),
+ import_names=frozenset(),
+ class_names=frozenset(),
+ api_surface=ModuleApiSurface(
+ module="pkg.mod",
+ filepath="x.py",
+ all_declared=("run",),
+ symbols=(
+ PublicSymbol(
+ qualname="pkg.mod:run",
+ kind="function",
+ start_line=1,
+ end_line=2,
+ params=(
+ ApiParamSpec(
+ name="beta",
+ kind="pos_or_kw",
+ has_default=False,
+ ),
+ ApiParamSpec(
+ name="alpha",
+ kind="pos_or_kw",
+ has_default=False,
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ )
+ cache.save()
+
+ loaded = Cache(cache_path)
+ loaded.load()
+ entry = loaded.get_file_entry("x.py")
+ assert entry is not None
+ params = entry["api_surface"]["symbols"][0]["params"]
+ assert [param["name"] for param in params] == ["beta", "alpha"]
+
+
def test_cache_load_normalizes_stale_structural_findings(tmp_path: Path) -> None:
cache_path = tmp_path / "cache.json"
cache = Cache(cache_path)
diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py
index ecc60a0..14e6a9c 100644
--- a/tests/test_cli_config.py
+++ b/tests/test_cli_config.py
@@ -148,6 +148,11 @@ def test_apply_pyproject_config_overrides_respects_explicit_cli_flags() -> None:
("min_loc", 10, 10),
("baseline", "codeclone.baseline.json", "codeclone.baseline.json"),
("cache_path", None, None),
+ (
+ "golden_fixture_paths",
+ ["tests/fixtures/golden_*", "tests/fixtures/golden_*"],
+ ("tests/fixtures/golden_*",),
+ ),
],
)
def test_validate_config_value_accepts_expected_types(
@@ -163,6 +168,8 @@ def test_validate_config_value_accepts_expected_types(
("update_baseline", "yes", "expected bool"),
("min_loc", True, "expected int"),
("baseline", 1, "expected str"),
+ ("golden_fixture_paths", "tests/fixtures/golden_*", "expected list\\[str\\]"),
+ ("golden_fixture_paths", ["pkg/*"], "must target tests/"),
],
)
def test_validate_config_value_rejects_invalid_types(
@@ -214,6 +221,30 @@ def test_normalize_path_config_value_behaviour(tmp_path: Path) -> None:
)
== "/tmp/absolute-cache.json"
)
+ patterns = ("tests/fixtures/golden_*",)
+ assert (
+ cfg_mod._normalize_path_config_value(
+ key="golden_fixture_paths",
+ value=patterns,
+ root_path=tmp_path,
+ )
+ == patterns
+ )
+
+
+def test_load_pyproject_config_accepts_golden_fixture_paths(tmp_path: Path) -> None:
+ _write_pyproject(
+ tmp_path / "pyproject.toml",
+ """
+[tool.codeclone]
+golden_fixture_paths = [
+ "./tests/fixtures/golden_*",
+ "tests/fixtures/golden_*",
+]
+""".strip(),
+ )
+ loaded = cfg_mod.load_pyproject_config(tmp_path)
+ assert loaded["golden_fixture_paths"] == ("tests/fixtures/golden_*",)
def test_load_toml_py310_missing_tomli_raises(
diff --git a/tests/test_cli_inprocess.py b/tests/test_cli_inprocess.py
index 022c726..a9e715a 100644
--- a/tests/test_cli_inprocess.py
+++ b/tests/test_cli_inprocess.py
@@ -31,7 +31,11 @@
)
from codeclone.errors import CacheError
from codeclone.models import Unit
-from tests._assertions import assert_contains_all, assert_mapping_entries
+from tests._assertions import (
+ assert_contains_all,
+ assert_mapping_entries,
+ assert_missing_keys,
+)
from tests._report_access import (
report_clone_groups as _report_clone_groups,
)
@@ -228,6 +232,19 @@ def f2():
)
+def _write_duplicate_function_module(directory: Path, filename: str) -> Path:
+ return _write_python_module(
+ directory,
+ filename,
+ """
+def duplicated():
+ value = 1
+ return value
+""".strip()
+ + "\n",
+ )
+
+
def _prepare_basic_project(root: Path) -> Path:
root.mkdir()
return _write_python_module(root, "a.py")
@@ -2305,6 +2322,70 @@ def test_cli_update_baseline_report_meta_uses_updated_payload_hash(
assert baseline_meta["payload_sha256_verified"] is True
+def test_cli_update_baseline_rewrites_embedded_metrics_to_enabled_surfaces_only(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ _write_python_module(
+ tmp_path,
+ "a.py",
+ """
+def public(value: int) -> int:
+ return value
+""",
+ )
+ baseline = tmp_path / "codeclone.baseline.json"
+
+ _run_parallel_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--baseline",
+ str(baseline),
+ "--update-baseline",
+ "--api-surface",
+ "--no-progress",
+ ],
+ )
+ initial_payload = json.loads(baseline.read_text("utf-8"))
+ assert "api_surface" in initial_payload
+ assert "typing_param_permille" in initial_payload["metrics"]
+
+ (tmp_path / "pyproject.toml").write_text(
+ """
+[tool.codeclone]
+baseline = "codeclone.baseline.json"
+api_surface = false
+typing_coverage = false
+docstring_coverage = false
+""".strip()
+ + "\n",
+ "utf-8",
+ )
+
+ _run_parallel_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--update-baseline",
+ "--no-progress",
+ ],
+ )
+
+ payload = json.loads(baseline.read_text("utf-8"))
+ meta = cast(dict[str, object], payload["meta"])
+ metrics = cast(dict[str, object], payload["metrics"])
+ assert_missing_keys(payload, "api_surface")
+ assert_missing_keys(meta, "api_surface_payload_sha256")
+ assert_missing_keys(
+ metrics,
+ "typing_param_permille",
+ "typing_return_permille",
+ "docstring_permille",
+ "typing_any_count",
+ )
+
+
def test_cli_update_baseline_write_error_is_contract_error(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
@@ -3322,6 +3403,162 @@ def test_cli_summary_with_api_surface_shows_public_api_line(
assert "modules" in out
+def test_cli_ci_summary_includes_adoption_and_public_api_lines(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+ capsys: pytest.CaptureFixture[str],
+) -> None:
+ src = tmp_path / "a.py"
+ metrics_baseline_path = tmp_path / "metrics-baseline.json"
+ src.write_text("def f(value: int) -> int:\n return value\n", "utf-8")
+ baseline_path = _write_baseline(
+ tmp_path / "baseline.json",
+ python_version=f"{sys.version_info.major}.{sys.version_info.minor}",
+ )
+ _patch_parallel(monkeypatch)
+ _run_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--no-progress",
+ "--api-surface",
+ "--metrics-baseline",
+ str(metrics_baseline_path),
+ "--update-metrics-baseline",
+ ],
+ )
+ _ = capsys.readouterr()
+ _run_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--ci",
+ "--baseline",
+ str(baseline_path),
+ "--metrics-baseline",
+ str(metrics_baseline_path),
+ "--api-surface",
+ ],
+ )
+ out = capsys.readouterr().out
+ assert "Adoption" in out
+ assert "Public API" in out
+ assert "symbols=" in out
+ assert "docstrings=" in out
+
+
+def test_cli_pyproject_golden_fixture_paths_exclude_fixture_clone_groups(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ fixtures_dir = tmp_path / "tests" / "fixtures" / "golden_project"
+ fixtures_dir.mkdir(parents=True)
+ _write_duplicate_function_module(fixtures_dir, "a.py")
+ _write_duplicate_function_module(fixtures_dir, "b.py")
+ report_path = tmp_path / "report.json"
+ (tmp_path / "pyproject.toml").write_text(
+ """
+[tool.codeclone]
+min_loc = 1
+min_stmt = 1
+fail_on_new = true
+skip_metrics = true
+golden_fixture_paths = ["tests/fixtures/golden_*"]
+""".strip()
+ + "\n",
+ "utf-8",
+ )
+
+ _run_parallel_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--no-progress",
+ "--json",
+ str(report_path),
+ ],
+ )
+
+ payload = json.loads(report_path.read_text("utf-8"))
+ clone_groups = cast(
+ "dict[str, object]",
+ cast("dict[str, object]", payload["findings"])["groups"],
+ )["clones"]
+ clone_groups_map = cast("dict[str, object]", clone_groups)
+ assert clone_groups_map["functions"] == []
+ suppressed = cast("dict[str, object]", clone_groups_map["suppressed"])
+ suppressed_functions = cast("list[dict[str, object]]", suppressed["functions"])
+ assert len(suppressed_functions) == 1
+ assert suppressed_functions[0]["suppression_rule"] == "golden_fixture"
+ assert (
+ cast("dict[str, int]", payload["findings"]["summary"]["clones"])["suppressed"]
+ == 1
+ )
+
+
+def test_cli_public_api_breaking_count_stable_across_warm_cache(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+ capsys: pytest.CaptureFixture[str],
+) -> None:
+ src = tmp_path / "pkg.py"
+ metrics_baseline_path = tmp_path / "metrics-baseline.json"
+ cache_path = tmp_path / "cache.json"
+ src.write_text(
+ "def run(alpha: int, beta: int) -> int:\n return alpha + beta\n",
+ "utf-8",
+ )
+ _patch_parallel(monkeypatch)
+ _run_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--no-progress",
+ "--api-surface",
+ "--metrics-baseline",
+ str(metrics_baseline_path),
+ "--update-metrics-baseline",
+ ],
+ )
+ _ = capsys.readouterr()
+
+ src.write_text(
+ "def run(beta: int, alpha: int) -> int:\n return alpha + beta\n",
+ "utf-8",
+ )
+
+ _run_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--no-progress",
+ "--api-surface",
+ "--metrics-baseline",
+ str(metrics_baseline_path),
+ "--cache-path",
+ str(cache_path),
+ ],
+ )
+ cold_out = capsys.readouterr().out
+
+ _run_main(
+ monkeypatch,
+ [
+ str(tmp_path),
+ "--no-progress",
+ "--api-surface",
+ "--metrics-baseline",
+ str(metrics_baseline_path),
+ "--cache-path",
+ str(cache_path),
+ ],
+ )
+ warm_out = capsys.readouterr().out
+
+ assert "1 breaking" in cold_out
+ assert "1 breaking" in warm_out
+
+
def test_cli_summary_no_color_has_no_ansi(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/test_cli_unit.py b/tests/test_cli_unit.py
index 3ccb414..a239a1b 100644
--- a/tests/test_cli_unit.py
+++ b/tests/test_cli_unit.py
@@ -940,6 +940,7 @@ def test_print_summary_invariant_warning(
func_clones_count=0,
block_clones_count=0,
segment_clones_count=0,
+ suppressed_golden_fixture_groups=0,
suppressed_segment_groups=0,
new_clones_count=0,
)
@@ -969,6 +970,51 @@ def test_compact_summary_labels_use_machine_scannable_keys() -> None:
== "Metrics cc=2.8/21 cbo=0.6/8 lcom4=1.2/4"
" cycles=0 dead_code=1 health=85(B) overloaded_modules=3"
)
+ assert (
+ ui.fmt_summary_compact_adoption(
+ param_permille=750,
+ return_permille=500,
+ docstring_permille=667,
+ any_annotation_count=1,
+ )
+ == "Adoption params=75.0% returns=50.0% docstrings=66.7% any=1"
+ )
+ assert (
+ ui.fmt_summary_compact_api_surface(
+ public_symbols=3,
+ modules=2,
+ breaking=1,
+ added=4,
+ )
+ == "Public API symbols=3 modules=2 breaking=1 added=4"
+ )
+ assert (
+ ui.fmt_summary_compact_clones(
+ function=1,
+ block=2,
+ segment=0,
+ suppressed=3,
+ fixture_excluded=2,
+ new=4,
+ )
+ == "Clones func=1 block=2 seg=0 suppressed=3 fixtures=2 new=4"
+ )
+ assert (
+ ui.fmt_summary_compact_coverage_join(
+ status="ok",
+ overall_permille=735,
+ coverage_hotspots=2,
+ scope_gap_hotspots=1,
+ threshold_percent=50,
+ source_label="coverage.xml",
+ )
+ == "Coverage status=ok overall=73.5% coverage_hotspots=2"
+ " threshold=50 scope_gaps=1 source=coverage.xml"
+ )
+ assert (
+ ui.fmt_coverage_join_ignored("bad xml")
+ == "[warning]Coverage join ignored: bad xml[/warning]"
+ )
def test_ui_summary_formatters_cover_optional_branches() -> None:
@@ -986,9 +1032,11 @@ def test_ui_summary_formatters_cover_optional_branches() -> None:
block=2,
segment=3,
suppressed=1,
+ fixture_excluded=2,
new=0,
)
assert "[bold yellow]3[/bold yellow] seg" in clones
+ assert "[yellow]2[/yellow] fixtures" in clones
assert "5 detected" in ui.fmt_metrics_cycles(5)
dead_with_suppressed = ui.fmt_metrics_dead_code(447, suppressed=9)
@@ -1036,6 +1084,34 @@ def test_ui_summary_formatters_cover_optional_branches() -> None:
added=4,
)
assert_contains_all(api_surface, "symbols", "modules", "breaking", "added")
+ coverage_join = ui.fmt_metrics_coverage_join(
+ status="ok",
+ overall_permille=735,
+ coverage_hotspots=2,
+ scope_gap_hotspots=1,
+ threshold_percent=50,
+ source_label="coverage.xml",
+ )
+ assert_contains_all(
+ coverage_join,
+ "73.5% overall",
+ "[bold red]2[/bold red] hotspots < 50%",
+ "[bold yellow]1[/bold yellow] scope gaps",
+ "coverage.xml",
+ )
+ coverage_join_unavailable = ui.fmt_metrics_coverage_join(
+ status="invalid",
+ overall_permille=0,
+ coverage_hotspots=0,
+ scope_gap_hotspots=0,
+ threshold_percent=50,
+ source_label="coverage.xml",
+ )
+ assert_contains_all(
+ coverage_join_unavailable,
+ "join unavailable",
+ "coverage.xml",
+ )
changed_paths = ui.fmt_changed_scope_paths(count=45)
assert "45" in changed_paths
assert "from git diff" in changed_paths
@@ -1124,7 +1200,60 @@ def test_print_metrics_in_quiet_mode_includes_overloaded_modules(
assert "Public API" not in out
-def test_print_metrics_in_normal_mode_includes_adoption_and_public_api(
+def test_print_metrics_in_quiet_mode_includes_adoption_public_api_and_coverage(
+ monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+ monkeypatch.setattr(cli, "console", cli._make_console(no_color=True))
+ cli_summary._print_metrics(
+ console=cast("cli_summary._Printer", cli.console),
+ quiet=True,
+ metrics=cli_summary.MetricsSnapshot(
+ complexity_avg=2.8,
+ complexity_max=20,
+ high_risk_count=0,
+ coupling_avg=0.5,
+ coupling_max=9,
+ cohesion_avg=1.2,
+ cohesion_max=4,
+ cycles_count=0,
+ dead_code_count=0,
+ health_total=85,
+ health_grade="B",
+ adoption_param_permille=750,
+ adoption_return_permille=500,
+ adoption_docstring_permille=667,
+ adoption_any_annotation_count=1,
+ api_surface_enabled=True,
+ api_surface_modules=2,
+ api_surface_public_symbols=3,
+ api_surface_added=4,
+ api_surface_breaking=1,
+ coverage_join_status="ok",
+ coverage_join_overall_permille=735,
+ coverage_join_coverage_hotspots=2,
+ coverage_join_scope_gap_hotspots=1,
+ coverage_join_threshold_percent=50,
+ coverage_join_source_label="coverage.xml",
+ overloaded_modules_candidates=3,
+ overloaded_modules_total=158,
+ overloaded_modules_population_status="ok",
+ overloaded_modules_top_score=0.98,
+ ),
+ )
+ out = capsys.readouterr().out
+ assert_contains_all(
+ out,
+ "Adoption",
+ "params=75.0%",
+ "Public API",
+ "breaking=1",
+ "Coverage",
+ "coverage_hotspots=2",
+ "source=coverage.xml",
+ )
+
+
+def test_print_metrics_in_normal_mode_includes_adoption_public_api_and_coverage(
monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
) -> None:
monkeypatch.setattr(cli, "console", cli._make_console(no_color=True))
@@ -1152,6 +1281,12 @@ def test_print_metrics_in_normal_mode_includes_adoption_and_public_api(
api_surface_public_symbols=3,
api_surface_added=4,
api_surface_breaking=1,
+ coverage_join_status="ok",
+ coverage_join_overall_permille=735,
+ coverage_join_coverage_hotspots=2,
+ coverage_join_scope_gap_hotspots=1,
+ coverage_join_threshold_percent=50,
+ coverage_join_source_label="coverage.xml",
overloaded_modules_candidates=3,
overloaded_modules_total=158,
overloaded_modules_population_status="ok",
@@ -1167,6 +1302,9 @@ def test_print_metrics_in_normal_mode_includes_adoption_and_public_api(
"Public API",
"3 symbols",
"1 breaking",
+ "Coverage",
+ "73.5% overall",
+ "2 hotspots < 50%",
)
@@ -1211,6 +1349,36 @@ def test_configure_metrics_mode_forces_dependency_and_dead_code_when_gated() ->
assert args.skip_dependencies is False
+def test_configure_metrics_mode_does_not_force_api_surface_for_baseline_update() -> (
+ None
+):
+ args = Namespace(
+ skip_metrics=False,
+ fail_complexity=-1,
+ fail_coupling=-1,
+ fail_cohesion=-1,
+ fail_cycles=False,
+ fail_dead_code=False,
+ fail_health=-1,
+ fail_on_new_metrics=False,
+ fail_on_typing_regression=False,
+ fail_on_docstring_regression=False,
+ fail_on_api_break=False,
+ fail_on_untested_hotspots=False,
+ min_typing_coverage=-1,
+ min_docstring_coverage=-1,
+ update_metrics_baseline=True,
+ skip_dead_code=False,
+ skip_dependencies=False,
+ api_surface=False,
+ coverage_xml=None,
+ )
+
+ cli._configure_metrics_mode(args=args, metrics_baseline_exists=True)
+
+ assert args.api_surface is False
+
+
def test_probe_metrics_baseline_section_for_non_object_payload(tmp_path: Path) -> None:
path = tmp_path / "baseline.json"
path.write_text("[]", "utf-8")
@@ -1270,6 +1438,94 @@ def test_metrics_computed_includes_api_surface_only_when_enabled() -> None:
)
+def test_metrics_computed_includes_coverage_join_only_with_xml() -> None:
+ assert cli._metrics_computed(
+ Namespace(
+ skip_metrics=False,
+ skip_dependencies=True,
+ skip_dead_code=True,
+ api_surface=False,
+ coverage_xml=None,
+ )
+ ) == ("complexity", "coupling", "cohesion", "health", "coverage_adoption")
+ assert cli._metrics_computed(
+ Namespace(
+ skip_metrics=False,
+ skip_dependencies=True,
+ skip_dead_code=True,
+ api_surface=False,
+ coverage_xml="coverage.xml",
+ )
+ ) == (
+ "complexity",
+ "coupling",
+ "cohesion",
+ "health",
+ "coverage_adoption",
+ "coverage_join",
+ )
+
+
+def test_enforce_gating_requires_coverage_input_for_hotspot_gate(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ cli.console = cli._make_console(no_color=True)
+ monkeypatch.setattr(cli, "gate", lambda **_kwargs: pipeline.GatingResult(0, ()))
+ with pytest.raises(SystemExit) as exc:
+ cli._enforce_gating(
+ args=Namespace(
+ fail_on_untested_hotspots=True,
+ fail_threshold=-1,
+ verbose=False,
+ ),
+ boot=cast("pipeline.BootstrapResult", object()),
+ analysis=cast(Any, SimpleNamespace(coverage_join=None)),
+ processing=cast(Any, Namespace(source_read_failures=[])),
+ source_read_contract_failure=False,
+ baseline_failure_code=None,
+ metrics_baseline_failure_code=None,
+ new_func=set(),
+ new_block=set(),
+ metrics_diff=None,
+ html_report_path=None,
+ )
+ assert exc.value.code == 2
+
+
+def test_enforce_gating_requires_valid_coverage_input_for_hotspot_gate(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ cli.console = cli._make_console(no_color=True)
+ monkeypatch.setattr(cli, "gate", lambda **_kwargs: pipeline.GatingResult(0, ()))
+ with pytest.raises(SystemExit) as exc:
+ cli._enforce_gating(
+ args=Namespace(
+ fail_on_untested_hotspots=True,
+ fail_threshold=-1,
+ verbose=False,
+ ),
+ boot=cast("pipeline.BootstrapResult", object()),
+ analysis=cast(
+ Any,
+ SimpleNamespace(
+ coverage_join=SimpleNamespace(
+ status="invalid",
+ invalid_reason="broken xml",
+ )
+ ),
+ ),
+ processing=cast(Any, Namespace(source_read_failures=[])),
+ source_read_contract_failure=False,
+ baseline_failure_code=None,
+ metrics_baseline_failure_code=None,
+ new_func=set(),
+ new_block=set(),
+ metrics_diff=None,
+ html_report_path=None,
+ )
+ assert exc.value.code == 2
+
+
def test_main_impl_exits_on_invalid_pyproject_config(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
diff --git a/tests/test_coverage_join.py b/tests/test_coverage_join.py
new file mode 100644
index 0000000..ceaa424
--- /dev/null
+++ b/tests/test_coverage_join.py
@@ -0,0 +1,294 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from pathlib import Path
+from xml.etree import ElementTree
+
+import pytest
+
+from codeclone.metrics.coverage_join import (
+ CoverageJoinParseError,
+ _iter_cobertura_class_elements,
+ _iter_cobertura_line_hits,
+ _local_tag_name,
+ _resolve_report_filename,
+ _resolved_coverage_sources,
+ build_coverage_join,
+)
+
+
+def test_build_coverage_join_maps_cobertura_lines_to_function_spans(
+ tmp_path: Path,
+) -> None:
+ source = tmp_path / "pkg" / "mod.py"
+ source.parent.mkdir()
+ source.write_text(
+ "\n".join(
+ (
+ "def hot(value):",
+ " if value:",
+ " return value",
+ "",
+ "def covered():",
+ " return 1",
+ "",
+ "def no_lines():",
+ " return 2",
+ )
+ )
+ + "\n",
+ encoding="utf-8",
+ )
+ coverage_xml = tmp_path / "coverage.xml"
+ coverage_xml.write_text(
+ """
+
+
+ .
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+""",
+ encoding="utf-8",
+ )
+ missing_source = tmp_path / "pkg" / "missing.py"
+
+ result = build_coverage_join(
+ coverage_xml=coverage_xml,
+ root_path=tmp_path,
+ hotspot_threshold_percent=60,
+ units=(
+ {
+ "qualname": "pkg.mod:covered",
+ "filepath": str(source),
+ "start_line": 5,
+ "end_line": 6,
+ "cyclomatic_complexity": 1,
+ "risk": "medium",
+ },
+ {
+ "qualname": "pkg.mod:no_lines",
+ "filepath": str(source),
+ "start_line": 8,
+ "end_line": 9,
+ "cyclomatic_complexity": 1,
+ "risk": "high",
+ },
+ {
+ "qualname": "pkg.missing:lost",
+ "filepath": str(missing_source),
+ "start_line": 1,
+ "end_line": 2,
+ "cyclomatic_complexity": 8,
+ "risk": "high",
+ },
+ {
+ "qualname": "pkg.mod:hot",
+ "filepath": str(source),
+ "start_line": 1,
+ "end_line": 3,
+ "cyclomatic_complexity": 12,
+ "risk": "high",
+ },
+ ),
+ )
+
+ assert result.status == "ok"
+ assert result.coverage_xml == str(coverage_xml.resolve())
+ assert result.files == 1
+ assert result.measured_units == 2
+ assert result.overall_executable_lines == 4
+ assert result.overall_covered_lines == 3
+ assert result.coverage_hotspots == 1
+ assert result.scope_gap_hotspots == 1
+ assert [fact.qualname for fact in result.units] == [
+ "pkg.missing:lost",
+ "pkg.mod:hot",
+ "pkg.mod:covered",
+ "pkg.mod:no_lines",
+ ]
+
+ missing, hot, covered, no_lines = result.units
+ assert missing.coverage_status == "missing_from_report"
+ assert missing.coverage_permille == 0
+ assert hot.coverage_status == "measured"
+ assert hot.executable_lines == 2
+ assert hot.covered_lines == 1
+ assert hot.coverage_permille == 500
+ assert covered.coverage_status == "measured"
+ assert covered.coverage_permille == 1000
+ assert no_lines.coverage_status == "no_executable_lines"
+ assert no_lines.coverage_permille == 0
+
+
+def test_build_coverage_join_rejects_invalid_cobertura_xml(tmp_path: Path) -> None:
+ coverage_xml = tmp_path / "coverage.xml"
+ coverage_xml.write_text("", encoding="utf-8")
+
+ with pytest.raises(CoverageJoinParseError, match="Invalid Cobertura XML"):
+ build_coverage_join(
+ coverage_xml=coverage_xml,
+ root_path=tmp_path,
+ hotspot_threshold_percent=50,
+ units=(),
+ )
+
+
+def test_coverage_join_resolves_sources_and_filenames(tmp_path: Path) -> None:
+ root_element = ElementTree.fromstring(
+ """
+
+ src
+ src
+
+ pkg
+
+"""
+ )
+ source_roots = _resolved_coverage_sources(
+ root_element=root_element,
+ root_path=tmp_path,
+ )
+ expected_roots = (
+ tmp_path.resolve(),
+ (tmp_path / "src").resolve(),
+ (tmp_path / "pkg").resolve(),
+ )
+
+ assert source_roots == expected_roots
+ assert _local_tag_name(123) == ""
+ assert _local_tag_name("{urn:test}source") == "source"
+
+ existing = tmp_path / "pkg" / "mod.py"
+ existing.parent.mkdir()
+ existing.write_text("def run():\n return 1\n", encoding="utf-8")
+
+ assert _resolve_report_filename(
+ filename="mod.py",
+ root_path=tmp_path,
+ source_roots=(tmp_path / "pkg",),
+ ) == str(existing.resolve())
+ assert _resolve_report_filename(
+ filename="missing.py",
+ root_path=tmp_path,
+ source_roots=(),
+ ) == str((tmp_path / "missing.py").resolve())
+ assert (
+ _resolve_report_filename(
+ filename="",
+ root_path=tmp_path,
+ source_roots=(),
+ )
+ is None
+ )
+ assert (
+ _resolve_report_filename(
+ filename=str(tmp_path.parent / "outside.py"),
+ root_path=tmp_path,
+ source_roots=(),
+ )
+ is None
+ )
+
+
+def test_coverage_join_path_resolution_fallbacks(
+ tmp_path: Path,
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ def _raise_os_error(
+ _self: Path,
+ *_args: object,
+ **_kwargs: object,
+ ) -> Path:
+ raise OSError("path resolution failed")
+
+ monkeypatch.setattr(Path, "resolve", _raise_os_error)
+ root_element = ElementTree.fromstring(
+ f"""
+
+ {tmp_path}
+
+"""
+ )
+
+ assert _resolved_coverage_sources(
+ root_element=root_element,
+ root_path=tmp_path,
+ ) == (tmp_path.absolute(),)
+
+
+def test_coverage_join_filters_cobertura_elements_and_unknown_risk(
+ tmp_path: Path,
+) -> None:
+ source = tmp_path / "pkg" / "mod.py"
+ source.parent.mkdir()
+ source.write_text("def run():\n return 1\n", encoding="utf-8")
+ coverage_xml = tmp_path / "coverage.xml"
+ coverage_xml.write_text(
+ """
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+""",
+ encoding="utf-8",
+ )
+
+ root_element = ElementTree.parse(coverage_xml).getroot()
+ classes = _iter_cobertura_class_elements(root_element)
+
+ assert [item.attrib["name"] for item in classes] == ["empty", "mod"]
+ assert _iter_cobertura_line_hits(classes[1]) == ((1, 0), (2, 1))
+
+ result = build_coverage_join(
+ coverage_xml=coverage_xml,
+ root_path=tmp_path,
+ hotspot_threshold_percent=50,
+ units=(
+ {
+ "qualname": "pkg.mod:run",
+ "filepath": str(source),
+ "start_line": 1,
+ "end_line": 2,
+ "cyclomatic_complexity": 1,
+ "risk": "dynamic",
+ },
+ ),
+ )
+
+ fact = result.units[0]
+ assert (fact.risk, fact.coverage_status, fact.coverage_permille) == (
+ "low",
+ "measured",
+ 500,
+ )
+ assert result.coverage_hotspots == 0
+ assert result.scope_gap_hotspots == 0
diff --git a/tests/test_golden_fixtures.py b/tests/test_golden_fixtures.py
new file mode 100644
index 0000000..398e52d
--- /dev/null
+++ b/tests/test_golden_fixtures.py
@@ -0,0 +1,140 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# Copyright (c) 2026 Den Rozhnovskiy
+
+from __future__ import annotations
+
+import pytest
+
+from codeclone.golden_fixtures import (
+ GoldenFixturePatternError,
+ build_suppressed_clone_groups,
+ normalize_golden_fixture_patterns,
+ path_matches_golden_fixture_pattern,
+ split_clone_groups_for_golden_fixtures,
+)
+
+
+def test_normalize_golden_fixture_patterns_rejects_non_test_scope() -> None:
+ with pytest.raises(GoldenFixturePatternError, match="must target tests/"):
+ normalize_golden_fixture_patterns(["pkg/golden_*"])
+
+
+@pytest.mark.parametrize(
+ ("pattern", "message"),
+ [
+ ("", "must be non-empty"),
+ ("/tmp/golden_*", "must be repo-relative"),
+ ("tests/../fixtures/golden_*", "must not contain '..'"),
+ ],
+)
+def test_normalize_golden_fixture_patterns_rejects_invalid_entries(
+ pattern: str,
+ message: str,
+) -> None:
+ with pytest.raises(GoldenFixturePatternError, match=message):
+ normalize_golden_fixture_patterns([pattern])
+
+
+def test_path_matches_golden_fixture_pattern_matches_directory_subtrees() -> None:
+ assert path_matches_golden_fixture_pattern(
+ "tests/fixtures/golden_project/alpha.py",
+ "tests/fixtures/golden_*",
+ )
+ assert not path_matches_golden_fixture_pattern(
+ "tests/helpers/golden_project/alpha.py",
+ "tests/fixtures/golden_*",
+ )
+
+
+def test_path_matches_golden_fixture_pattern_rejects_empty_relative_path() -> None:
+ assert not path_matches_golden_fixture_pattern("", "tests/fixtures/golden_*")
+
+
+def test_split_clone_groups_for_golden_fixtures_requires_full_group_match() -> None:
+ split = split_clone_groups_for_golden_fixtures(
+ groups={
+ "golden": [
+ {"filepath": "/repo/tests/fixtures/golden_project/a.py"},
+ {"filepath": "/repo/tests/fixtures/golden_project/b.py"},
+ ],
+ "mixed": [
+ {"filepath": "/repo/tests/fixtures/golden_project/c.py"},
+ {"filepath": "/repo/pkg/mod.py"},
+ ],
+ },
+ kind="function",
+ golden_fixture_paths=("tests/fixtures/golden_*",),
+ scan_root="/repo",
+ )
+
+ assert set(split.active_groups) == {"mixed"}
+ assert set(split.suppressed_groups) == {"golden"}
+ assert split.matched_patterns == {
+ "golden": ("tests/fixtures/golden_*",),
+ }
+
+
+def test_split_clone_groups_for_golden_fixtures_keeps_missing_or_unmatched_items() -> (
+ None
+):
+ split = split_clone_groups_for_golden_fixtures(
+ groups={
+ "missing": [
+ {"filepath": ""},
+ {"filepath": "/repo/tests/fixtures/golden_project/b.py"},
+ ],
+ "unmatched": [
+ {"filepath": "/repo/tests/golden_project/a.py"},
+ {"filepath": "/repo/tests/golden_project/b.py"},
+ ],
+ },
+ kind="function",
+ golden_fixture_paths=("tests/fixtures/golden_*",),
+ scan_root="/repo",
+ )
+
+ assert set(split.active_groups) == {"missing", "unmatched"}
+ assert split.suppressed_groups == {}
+ assert split.matched_patterns == {}
+
+
+def test_build_suppressed_clone_groups_carries_rule_and_patterns() -> None:
+ suppressed = build_suppressed_clone_groups(
+ kind="function",
+ groups={
+ "golden": [
+ {
+ "filepath": "/repo/tests/fixtures/golden_project/a.py",
+ "qualname": "tests.fixtures.golden_project.a:run",
+ }
+ ]
+ },
+ matched_patterns={"golden": ("tests/fixtures/golden_*",)},
+ )
+
+ assert len(suppressed) == 1
+ group = suppressed[0]
+ assert group.group_key == "golden"
+ assert group.matched_patterns == ("tests/fixtures/golden_*",)
+ assert group.suppression_rule == "golden_fixture"
+ assert group.suppression_source == "project_config"
+
+
+def test_build_suppressed_clone_groups_skips_blank_pattern_bindings() -> None:
+ suppressed = build_suppressed_clone_groups(
+ kind="function",
+ groups={
+ "golden": [
+ {
+ "filepath": "/repo/tests/fixtures/golden_project/a.py",
+ "qualname": "tests.fixtures.golden_project.a:run",
+ }
+ ]
+ },
+ matched_patterns={"golden": ("", " ")},
+ )
+
+ assert suppressed == ()
diff --git a/tests/test_html_report.py b/tests/test_html_report.py
index 5620513..4fb9351 100644
--- a/tests/test_html_report.py
+++ b/tests/test_html_report.py
@@ -12,6 +12,7 @@
import pytest
+from codeclone._html_badges import _tab_empty_info
from codeclone.contracts import (
CACHE_VERSION,
DOCS_URL,
@@ -33,6 +34,7 @@
StructuralFindingGroup,
StructuralFindingOccurrence,
Suggestion,
+ SuppressedCloneGroup,
)
from codeclone.report import build_block_group_facts
from codeclone.report.json_contract import (
@@ -41,6 +43,7 @@
structural_group_id,
)
from codeclone.report.serialize import render_json_report_document
+from tests._assertions import assert_contains_all
from tests._report_fixtures import (
REPEATED_ASSERT_SOURCE,
repeated_block_group_key,
@@ -1356,6 +1359,66 @@ def test_html_report_segments_section(tmp_path: Path) -> None:
assert "Segment clones" in html
+def test_html_report_clone_tab_renders_suppressed_golden_fixture_groups(
+ tmp_path: Path,
+) -> None:
+ fixture_file = tmp_path / "tests" / "fixtures" / "golden_project" / "alpha.py"
+ fixture_file_2 = tmp_path / "tests" / "fixtures" / "golden_project" / "beta.py"
+ fixture_file.parent.mkdir(parents=True, exist_ok=True)
+ fixture_file.write_text("def run():\n return 1\n", "utf-8")
+ fixture_file_2.write_text("def run():\n return 2\n", "utf-8")
+
+ suppressed_group = SuppressedCloneGroup(
+ kind="function",
+ group_key="tests.fixtures.golden.alpha:run",
+ items=(
+ {
+ "qualname": "tests.fixtures.golden.alpha:run",
+ "filepath": str(fixture_file),
+ "start_line": 1,
+ "end_line": 2,
+ "loc": 2,
+ "stmt_count": 1,
+ "fingerprint": "fp-run",
+ "loc_bucket": "0-19",
+ },
+ {
+ "qualname": "tests.fixtures.golden.beta:run",
+ "filepath": str(fixture_file_2),
+ "start_line": 1,
+ "end_line": 2,
+ "loc": 2,
+ "stmt_count": 1,
+ "fingerprint": "fp-run",
+ "loc_bucket": "0-19",
+ },
+ ),
+ matched_patterns=("tests/fixtures/golden_*",),
+ suppression_rule="golden_fixture",
+ suppression_source="project_config",
+ )
+ report_document = build_report_document(
+ func_groups={},
+ block_groups={},
+ segment_groups={},
+ meta={"scan_root": str(tmp_path)},
+ suppressed_clone_groups=(suppressed_group,),
+ )
+
+ html = build_html_report(
+ func_groups={},
+ block_groups={},
+ segment_groups={},
+ report_meta={"scan_root": str(tmp_path)},
+ report_document=report_document,
+ )
+
+ assert "Suppressed" in html
+ assert "golden_fixture@project_config" in html
+ assert "tests/fixtures/golden_*" in html
+ assert "No code clones detected" not in html
+
+
def test_html_report_single_item_group(tmp_path: Path) -> None:
f = tmp_path / "a.py"
f.write_text("def f():\n x = 1\n", "utf-8")
@@ -1641,15 +1704,15 @@ def test_html_report_metrics_risk_branches() -> None:
dead_critical=2,
),
)
- assert "insight-risk" in html
- assert 'stroke="var(--error)"' in html
- assert "Cycles: 1; max dependency depth: 4." in html
- assert "5 candidates total; 2 high-confidence items; 0 suppressed." in html
- assert '