Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ Available stats:
| `profile` | Per-developer report: scope, specialization index, contribution type, pace, collaboration, top files |
| `top-commits` | Largest commits ranked by lines changed (includes message if extracted with `--include-commit-messages`) |
| `pareto` | Concentration (80% threshold) across files, devs (two lenses: commits and churn), and directories |
| `structure` | Repo layout as a `tree(1)`-style view, dirs sorted by aggregate churn, capped by `--tree-depth` (default 3) |

Output formats: `table` (default, human-readable), `csv` (single clean table per `--stat`, header row on line 1), `json` (unified object with all sections).

Expand Down
27 changes: 24 additions & 3 deletions cmd/gitcortex/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func isValidStat(s string) bool {
switch s {
case "summary", "contributors", "hotspots", "directories", "activity",
"busfactor", "coupling", "churn-risk", "working-patterns",
"dev-network", "profile", "top-commits", "pareto":
"dev-network", "profile", "top-commits", "pareto", "structure":
return true
}
return false
Expand All @@ -125,20 +125,22 @@ type statsFlags struct {
churnHalfLife int
networkMinFiles int
email string
treeDepth int
}

func addStatsFlags(cmd *cobra.Command, sf *statsFlags) {
cmd.Flags().StringSliceVar(&sf.inputs, "input", []string{"git_data.jsonl"}, "Input JSONL file(s) from extract (repeatable for multi-repo)")
cmd.Flags().StringVar(&sf.format, "format", "table", "Output format: table, csv, json")
cmd.Flags().IntVar(&sf.topN, "top", 10, "Number of top entries to show (0 = all)")
cmd.Flags().StringVar(&sf.granularity, "granularity", "month", "Activity granularity: day, week, month, year")
cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto")
cmd.Flags().StringVar(&sf.stat, "stat", "", "Show a specific stat: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure")
cmd.Flags().IntVar(&sf.couplingMaxFiles, "coupling-max-files", 50, "Max files per commit for coupling analysis")
cmd.Flags().IntVar(&sf.couplingMinChanges, "coupling-min-changes", 5, "Min co-changes for coupling results")
cmd.Flags().IntVar(&sf.churnHalfLife, "churn-half-life", 90, "Half-life in days for churn decay (churn-risk)")
cmd.Flags().IntVar(&sf.networkMinFiles, "network-min-files", 5, "Min shared files for dev-network edges")
cmd.Flags().StringVar(&sf.email, "email", "", "Filter by developer email (for profile stat)")
cmd.Flags().StringVar(&sf.since, "since", "", "Filter to recent period (e.g. 7d, 4w, 3m, 1y)")
cmd.Flags().IntVar(&sf.treeDepth, "tree-depth", 3, "Max depth for --stat structure (0 = unlimited)")
}

func validateStatsFlags(sf *statsFlags) error {
Expand All @@ -149,7 +151,7 @@ func validateStatsFlags(sf *statsFlags) error {
return fmt.Errorf("invalid --granularity %q; must be one of: day, week, month, year", sf.granularity)
}
if sf.stat != "" && !isValidStat(sf.stat) {
return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto", sf.stat)
return fmt.Errorf("invalid --stat %q; valid: summary, contributors, hotspots, directories, activity, busfactor, coupling, churn-risk, working-patterns, dev-network, profile, top-commits, pareto, structure", sf.stat)
}
return nil
}
Expand Down Expand Up @@ -331,6 +333,22 @@ func renderStats(ds *stats.Dataset, sf *statsFlags) error {
return err
}
}
if sf.stat == "structure" {
root := reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth)
// CSV skips the stderr banner — downstream parsers sometimes
// tail stderr onto stdout, and a stray "=== ... ===" would
// break the single-table contract.
if sf.format != "csv" {
depthLabel := "unlimited"
if sf.treeDepth > 0 {
depthLabel = fmt.Sprintf("%d", sf.treeDepth)
}
fmt.Fprintf(os.Stderr, "\n=== Repo Structure (depth %s) ===\n", depthLabel)
}
if err := reportpkg.RenderTreeForFormat(os.Stdout, root, sf.format); err != nil {
return err
}
}

return nil
}
Expand Down Expand Up @@ -378,6 +396,9 @@ func renderStatsJSON(f *stats.Formatter, ds *stats.Dataset, sf *statsFlags) erro
if showAll || sf.stat == "top-commits" {
report["top_commits"] = stats.TopCommits(ds, sf.topN)
}
if sf.stat == "structure" {
report["structure"] = reportpkg.BuildRepoTree(stats.FileHotspots(ds, 0), sf.treeDepth)
}

return f.PrintReport(report)
}
Expand Down
14 changes: 14 additions & 0 deletions docs/METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,20 @@ Two dev lenses are surfaced because commit count alone is a flawed proxy for con

**How to interpret**: "20 files concentrate 80% of all churn" describes where change lands — it can indicate a healthy core module under active development, or a bottleneck if combined with low bus factor. Cross-reference with the Churn Risk section before drawing conclusions.

## Repo Structure

A `tree(1)`-style view of the repository's directory layout, built from paths seen in history (`FileHotspots`), not from the filesystem at HEAD. Deleted files are included — the view answers "what shaped the codebase", not "what is present today".

**Aggregation**:
- File nodes: `Commits` and `Churn` are the per-file values.
- Directory nodes: `Churn` and `Files` sum over all descendants; `Commits` is intentionally left at zero. Per-file commit counts do not sum to a distinct commit count — one commit that touches three files would add to three children. `Files` is the distinct descendant count.

**Ordering**: within each level, directories come first (architectural shape reads top-down), then files. Ties are broken by churn descending, then name ascending.

**Truncation**: the CLI caps depth at `--tree-depth` (default 3, 0 = unlimited). The HTML report additionally caps children at 50 per directory to keep the page under ~1MB on kernel-scale repos; the tail is collapsed into a `… N more hidden (ranked by churn)` counter.

**When to use**: before drilling into hotspots or churn-risk, skim the structure to locate the modules those files live in. The tree is navigational context; ranked tables are where judgment happens.

## Data Flow

```
Expand Down
1 change: 1 addition & 0 deletions docs/RUNBOOK.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ Section headers go to stderr, data to stdout. To capture only data:
./gitcortex stats --input data.jsonl --stat profile
./gitcortex stats --input data.jsonl --stat profile --email alice@company.com
./gitcortex stats --input data.jsonl --stat top-commits --top 20
./gitcortex stats --input data.jsonl --stat structure --tree-depth 3
```

### Time filtering
Expand Down
21 changes: 21 additions & 0 deletions internal/report/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,27 @@ type ReportData struct {
// hotspot" from "there are 48 legacy-hotspots in total". Populated
// alongside ChurnRisk in Generate().
ChurnRiskLabelCounts []LabelCount

// Structure holds a pruned repo-structure tree rendered as a
// collapsible architecture view. Truncated to htmlTreeDepth levels
// so mature repos (linux-scale) don't blow up the HTML. nil when
// the dataset has no files.
Structure *TreeNode
}

// htmlTreeDepth caps the repo-structure tree baked into the HTML report.
// Three levels resolves top-level modules and their immediate children,
// enough to read the architecture at a glance without drowning the page
// on kernel-scale repos. CLI users can override via --tree-depth.
const htmlTreeDepth = 3

// htmlTreeMaxChildrenPerDir keeps wide directories (e.g. repos with
// hundreds of sibling files at one level) from ballooning the HTML.
// Children are pre-sorted dirs-first then churn-desc, so the top 50
// preserves the architectural shape and pushes long tails into a
// "… N more" counter. CLI does not apply this cap.
const htmlTreeMaxChildrenPerDir = 50

// LabelCount pairs a Churn Risk label with its total count and sort
// priority, so the template can render chips in the same label order
// used by the table below.
Expand Down Expand Up @@ -337,7 +356,9 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat
Pareto: ComputePareto(ds),
PatternGrid: grid,
MaxPattern: maxP,
Structure: BuildRepoTree(stats.FileHotspots(ds, 0), htmlTreeDepth),
}
CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir)

return tmpl.Execute(w, data)
}
Expand Down
52 changes: 44 additions & 8 deletions internal/report/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,22 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
.chip-active { background: #2da44e; color: #fff; }
.chip-cold { background: #eaeef2; color: #656d76; }
.chip-note { font-size: 11px; color: #656d76; font-style: italic; margin-left: 4px; }
.glossary { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; }
.glossary summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; }
.glossary[open] summary { margin-bottom: 8px; }
.glossary dl { font-size: 12px; color: #24292f; margin: 0; }
.glossary dt { font-weight: 600; margin-top: 8px; }
.glossary dt:first-child { margin-top: 0; }
.glossary dd { color: #656d76; margin: 2px 0 0; }
.accordion { background: #fff; border: 1px solid #d0d7de; border-radius: 6px; padding: 10px 16px; margin-bottom: 24px; }
.accordion + .accordion { margin-top: -16px; }
.accordion summary { cursor: pointer; font-weight: 600; font-size: 13px; color: #24292f; }
.accordion[open] summary { margin-bottom: 8px; }
.accordion dl { font-size: 12px; color: #24292f; margin: 0; }
.accordion dt { font-weight: 600; margin-top: 8px; }
.accordion dt:first-child { margin-top: 0; }
.accordion dd { color: #656d76; margin: 2px 0 0; }
</style>
</head>
<body>

<h1>{{.RepoName}} report</h1>
<p class="subtitle">{{.Summary.FirstCommitDate}} to {{.Summary.LastCommitDate}}</p>

<details class="glossary">
<details class="accordion">
<summary>Glossary — what do these terms mean?</summary>
<p style="font-size:12px; color:#24292f; margin:0 0 10px; line-height:1.5;">gitcortex is a <b>repository behavior analyzer</b>, not a code analyzer. These metrics describe what people and processes did in git — who touched what, when, and with whom — not the quality of the source code itself. A file classified as <b>silo</b> or <b>legacy-hotspot</b> reveals a human or process pattern; it is not a judgment on the code (a well-written library maintained by one person will classify as silo regardless of how good it is). Labels point at where to look, not what to conclude.</p>
<dl>
Expand Down Expand Up @@ -91,6 +92,17 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
</dl>
</details>

{{if .Structure}}{{if .Structure.Children}}
<details class="accordion">
<summary>Repo Structure — architecture at a glance</summary>
<p style="font-size:12px; color:#656d76; margin:8px 0 10px; line-height:1.5;">Directories sorted by total churn within each level; files listed after. Counts reflect history, so deleted files still appear (they shaped the codebase). Tree capped at the top 3 levels; deeper subtrees collapsed to keep the report readable.</p>
<div class="tree mono" style="font-size:12px; line-height:1.6;">
<div><b>{{.Structure.Name}}</b> <span style="color:#656d76;">({{thousands .Structure.Files}} files, {{humanize .Structure.Churn}} churn)</span></div>
{{template "treechildren" .Structure}}
</div>
</details>
{{end}}{{end}}

<div class="cards">
<div class="card"><div class="label">Commits</div><div class="value" title="{{thousands .Summary.TotalCommits}}">{{humanize .Summary.TotalCommits}}</div></div>
<div class="card"><div class="label">Developers</div><div class="value" title="{{thousands .Summary.TotalDevs}}">{{humanize .Summary.TotalDevs}}</div></div>
Expand Down Expand Up @@ -382,6 +394,30 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
{{end}}
{{end}}

{{define "treechildren"}}
<ul style="list-style:none; padding-left:18px; margin:2px 0; border-left:1px solid #eaeef2;">
{{range .Children}}
<li style="padding:1px 0;">
{{if .IsDir}}
{{if .Children}}
<details>
<summary style="cursor:pointer; color:#0969da;">📁 {{.Name}}/ <span style="color:#656d76; font-weight:normal;">({{thousands .Files}} files · {{humanize .Churn}} churn)</span></summary>
{{template "treechildren" .}}
</details>
{{else}}
<span style="color:#0969da;">📁 {{.Name}}/</span> <span style="color:#656d76;">({{thousands .Files}} files · {{humanize .Churn}} churn){{if .Truncated}} <i>… subtree hidden</i>{{end}}</span>
{{end}}
{{else}}
<span>📄 {{.Name}}</span> <span style="color:#656d76;">({{thousands .Commits}} commits · {{humanize .Churn}} churn)</span>
{{end}}
</li>
{{end}}
{{if gt .HiddenChildren 0}}
<li style="padding:1px 0; color:#656d76; font-style:italic;">… {{thousands .HiddenChildren}} more hidden (ranked by churn)</li>
{{end}}
</ul>
{{end}}

<footer>Generated by <a href="https://github.com/lex0c/gitcortex" target="_blank" rel="noopener noreferrer" style="color:#0969da; text-decoration:none;">gitcortex</a> · {{.GeneratedAt}}</footer>

</body>
Expand Down
Loading
Loading