From 90baa68c445fa98c04325eb6ce0976bdc6f10c43 Mon Sep 17 00:00:00 2001 From: Eric Fritz Date: Thu, 11 Sep 2025 14:05:41 -0500 Subject: [PATCH 1/3] Use shared IsSimpleIdentifierCharacter when testing identifier characters. --- bindings/go/scip/symbol_parser.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/bindings/go/scip/symbol_parser.go b/bindings/go/scip/symbol_parser.go index 816b4476..d427162a 100644 --- a/bindings/go/scip/symbol_parser.go +++ b/bindings/go/scip/symbol_parser.go @@ -3,7 +3,6 @@ package scip import ( "fmt" "strings" - "unicode" "github.com/cockroachdb/errors" "github.com/sourcegraph/beaut" @@ -469,10 +468,6 @@ func (e unrecognizedDescriptorError) Error() string { return fmt.Sprintf("unrecognized descriptor %q", e.value) } -func isIdentifierCharacter(r rune) bool { - return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '+' || r == '$' || r == '_' -} - func (z *symbolParserV2) advanceOneByte(b byte) { assert(z.currentRune == rune(b), "passed in byte does not match current rune") nextRune, nextRuneByteLength := z.peekNext() @@ -503,7 +498,7 @@ func (z *symbolParserV2) acceptIdentifier(what parseCtx, sw *stringWriter) error start := z.byteIndex slen := len(z.SymbolString) for z.byteIndex < slen { - if !isIdentifierCharacter(z.currentRune) { + if !shared.IsSimpleIdentifierCharacter(z.currentRune) { break } z.advanceRune() From b49e8495a0c54c5cdb38111b05d27c5f9809c1f3 Mon Sep 17 00:00:00 2001 From: Eric Fritz Date: Thu, 11 Sep 2025 14:06:12 -0500 Subject: [PATCH 2/3] WIP. --- bindings/go/scip/symbol_parser.go | 2 +- bindings/go/scip/symbol_test.go | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/bindings/go/scip/symbol_parser.go b/bindings/go/scip/symbol_parser.go index d427162a..b40c5c47 100644 --- a/bindings/go/scip/symbol_parser.go +++ b/bindings/go/scip/symbol_parser.go @@ -479,7 +479,7 @@ func (z *symbolParserV2) advanceOneByte(b byte) { func (z *symbolParserV2) advanceRune() { nextRune, nextRuneByteLength := z.peekNext() - z.advance(nextRune, min(nextRuneByteLength, 1)) + z.advance(nextRune, nextRuneByteLength) } func (z *symbolParserV2) acceptOneByte(b byte, what parseCtx) error { diff --git a/bindings/go/scip/symbol_test.go b/bindings/go/scip/symbol_test.go index b92b440b..7bbc9d5a 100644 --- a/bindings/go/scip/symbol_test.go +++ b/bindings/go/scip/symbol_test.go @@ -108,6 +108,19 @@ func TestParseSymbol(t *testing.T) { }, }, }, + { + Symbol: "a b c d `F⃗`.", Expected: &Symbol{ + Scheme: "a", + Package: &Package{ + Manager: "b", + Name: "c", + Version: "d", + }, + Descriptors: []*Descriptor{{ + Name: "F⃗", Suffix: Descriptor_Term, + }}, + }, + }, } for _, test := range tests { t.Run(test.Symbol, func(t *testing.T) { From 52041acc9b4d1bff553eb78e83dea522bbad4e86 Mon Sep 17 00:00:00 2001 From: Eric Fritz Date: Thu, 11 Sep 2025 14:40:41 -0500 Subject: [PATCH 3/3] Add additional test cases. --- bindings/go/scip/symbol_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bindings/go/scip/symbol_test.go b/bindings/go/scip/symbol_test.go index 7bbc9d5a..dfcb546e 100644 --- a/bindings/go/scip/symbol_test.go +++ b/bindings/go/scip/symbol_test.go @@ -140,6 +140,8 @@ func TestParseSymbolError(t *testing.T) { "lsif-java maven package 1.0.0 java/io/File#Entry.trailingstring", "lsif-java maven package 1.0.0 java/io/File#Entry.unrecognizedSuffix@", "lsif-java maven package 1.0.0 java/io/File#Entry.nonSimpλeIdentifier.", + "lsif-java maven package 1.0.0 java/io/File#Entry.`unterminatedEscapedIdentifier", + "lsif-java maven package 1.0.0 java/io/File#Entry.[UnterminatedDescriptorSuffix", "local 🧠", "local ", "local &&&",