From 9499356968428b093c4a6651d7418ca677eeea70 Mon Sep 17 00:00:00 2001 From: Grey Newell Date: Thu, 9 Apr 2026 14:29:53 -0400 Subject: [PATCH 1/2] test(build): add regression tests for search-index rune-boundary truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds build_test.go covering generateSearchIndex: - short description written verbatim - long ASCII description truncated to exactly 120 runes - multi-byte (é, 2-byte) description truncated at rune boundary → valid UTF-8 - search disabled → no file written Regression for the byte-slice truncation bug fixed in build.go. Co-Authored-By: Claude Sonnet 4.6 --- internal/archdocs/pssg/build/build_test.go | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 internal/archdocs/pssg/build/build_test.go diff --git a/internal/archdocs/pssg/build/build_test.go b/internal/archdocs/pssg/build/build_test.go new file mode 100644 index 0000000..a948f70 --- /dev/null +++ b/internal/archdocs/pssg/build/build_test.go @@ -0,0 +1,140 @@ +package build + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + "unicode/utf8" + + "github.com/supermodeltools/cli/internal/archdocs/pssg/config" + "github.com/supermodeltools/cli/internal/archdocs/pssg/entity" +) + +func newBuilder(outDir string) *Builder { + return NewBuilder(&config.Config{ + Search: config.SearchConfig{Enabled: true}, + Paths: config.PathsConfig{Output: outDir}, + }, false) +} + +func makeEntity(slug, title, description string) *entity.Entity { + return &entity.Entity{ + Slug: slug, + Fields: map[string]interface{}{ + "title": title, + "description": description, + }, + } +} + +// TestGenerateSearchIndex_ShortDescription verifies that descriptions under +// the 120-rune limit are written verbatim. +func TestGenerateSearchIndex_ShortDescription(t *testing.T) { + outDir := t.TempDir() + b := newBuilder(outDir) + + ent := makeEntity("test-slug", "Test Title", "Short description.") + if err := b.generateSearchIndex([]*entity.Entity{ent}, outDir); err != nil { + t.Fatalf("generateSearchIndex: %v", err) + } + + entries := readSearchIndex(t, outDir) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + if entries[0]["d"] != "Short description." { + t.Errorf("description mismatch: got %q", entries[0]["d"]) + } +} + +// TestGenerateSearchIndex_LongASCIIDescription verifies ASCII-only descriptions +// longer than 120 chars are truncated to exactly 120 runes. +func TestGenerateSearchIndex_LongASCIIDescription(t *testing.T) { + outDir := t.TempDir() + b := newBuilder(outDir) + + // build a 200-char ASCII string + long := "" + for i := 0; i < 200; i++ { + long += "a" + } + + ent := makeEntity("slug", "Title", long) + if err := b.generateSearchIndex([]*entity.Entity{ent}, outDir); err != nil { + t.Fatalf("generateSearchIndex: %v", err) + } + + entries := readSearchIndex(t, outDir) + got := entries[0]["d"] + if len([]rune(got)) != 120 { + t.Errorf("expected 120 runes, got %d", len([]rune(got))) + } +} + +// TestGenerateSearchIndex_MultiByteDescriptionTruncation is the regression test +// for the byte-vs-rune truncation bug. A description whose byte length exceeds +// 120 but whose rune count does not must NOT be truncated. A description whose +// rune count exceeds 120 must be truncated at a rune boundary so the result +// is valid UTF-8. +func TestGenerateSearchIndex_MultiByteDescriptionTruncation(t *testing.T) { + outDir := t.TempDir() + b := newBuilder(outDir) + + // Each 'é' is 2 bytes (U+00E9). We build a string of 121 'é' characters: + // rune length = 121 (> 120) so it must be truncated to 120 runes. + // byte length = 242, so the old code would have produced a split in the + // middle of a multi-byte sequence → invalid UTF-8. + longMultiByte := "" + for i := 0; i < 121; i++ { + longMultiByte += "é" + } + + ent := makeEntity("slug", "Title", longMultiByte) + if err := b.generateSearchIndex([]*entity.Entity{ent}, outDir); err != nil { + t.Fatalf("generateSearchIndex: %v", err) + } + + entries := readSearchIndex(t, outDir) + got := entries[0]["d"] + + if !utf8.ValidString(got) { + t.Errorf("truncated description is not valid UTF-8: %q", got) + } + if runes := []rune(got); len(runes) != 120 { + t.Errorf("expected 120 runes after truncation, got %d", len(runes)) + } +} + +// TestGenerateSearchIndex_DisabledSearch verifies no file is written when search +// is disabled. +func TestGenerateSearchIndex_DisabledSearch(t *testing.T) { + outDir := t.TempDir() + b := NewBuilder(&config.Config{ + Search: config.SearchConfig{Enabled: false}, + Paths: config.PathsConfig{Output: outDir}, + }, false) + + ent := makeEntity("slug", "Title", "desc") + if err := b.generateSearchIndex([]*entity.Entity{ent}, outDir); err != nil { + t.Fatalf("generateSearchIndex: %v", err) + } + + if _, err := os.Stat(filepath.Join(outDir, "search-index.json")); !os.IsNotExist(err) { + t.Error("search-index.json should not be written when search is disabled") + } +} + +// readSearchIndex reads and unmarshals the search-index.json from outDir. +func readSearchIndex(t *testing.T, outDir string) []map[string]string { + t.Helper() + data, err := os.ReadFile(filepath.Join(outDir, "search-index.json")) + if err != nil { + t.Fatalf("reading search-index.json: %v", err) + } + var entries []map[string]string + if err := json.Unmarshal(data, &entries); err != nil { + t.Fatalf("unmarshaling search-index.json: %v", err) + } + return entries +} From 79338bbd20d6535246d51b7d0f646a761cdb7cd5 Mon Sep 17 00:00:00 2001 From: Grey Newell Date: Thu, 9 Apr 2026 14:30:22 -0400 Subject: [PATCH 2/2] fix(build): truncate search index descriptions by rune count not bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit generateSearchIndex was using len(desc)/desc[:120] (byte operations) to limit descriptions to 120 characters. For multi-byte UTF-8 characters (é, ñ, ü, CJK, emoji) this could split a character in the middle, producing a replacement character (U+FFFD) when json.Marshal silently replaces invalid UTF-8 sequences in the output JSON. Fix: convert to []rune, check/slice by rune count, convert back to string. Co-Authored-By: Claude Sonnet 4.6 --- internal/archdocs/pssg/build/build.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/archdocs/pssg/build/build.go b/internal/archdocs/pssg/build/build.go index 2d46845..b03de88 100644 --- a/internal/archdocs/pssg/build/build.go +++ b/internal/archdocs/pssg/build/build.go @@ -1342,8 +1342,8 @@ func (b *Builder) generateSearchIndex(entities []*entity.Entity, outDir string) entries := make([]searchEntry, 0, len(entities)) for _, e := range entities { desc := e.GetString("description") - if len(desc) > 120 { - desc = desc[:120] + if runes := []rune(desc); len(runes) > 120 { + desc = string(runes[:120]) } entries = append(entries, searchEntry{ T: e.GetString("title"),