From 847cfcc70090775120c395a31585b3583f9dfcc8 Mon Sep 17 00:00:00 2001 From: Milos Pesic Date: Mon, 8 Sep 2025 13:47:30 +0200 Subject: [PATCH 1/2] Extract typed values from strings via regex --- utils/kv_regex.go | 144 ++++++++++++++++++++++++++++++++++++++ utils/kv_regex_test.go | 153 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 utils/kv_regex.go create mode 100644 utils/kv_regex_test.go diff --git a/utils/kv_regex.go b/utils/kv_regex.go new file mode 100644 index 000000000..1f0b0ccf8 --- /dev/null +++ b/utils/kv_regex.go @@ -0,0 +1,144 @@ +package utils + +import ( + "math" + "regexp" + "strconv" + "strings" + "time" +) + +// Scanner holds the source string to be searched. +type Scanner struct { + src string +} + +func New(s string) Scanner { return Scanner{src: s} } + +// Raw returns the first capture group by default, or a named group if provided. +// Example: +// +// re := regexp.MustCompile(`key=(\d+)`) // group 1 +// kv.Raw(re) -> "123", true +// re2 := regexp.MustCompile(`key=(?P\d+)`) // named group "val" +// kv.Raw(re2, "val") -> "123", true +func (kv Scanner) Raw(re *regexp.Regexp, group ...string) (string, bool) { + idx, ok := resolveGroupIndex(re, group...) + if !ok { + return "", false + } + m := re.FindStringSubmatch(kv.src) + if len(m) == 0 || idx >= len(m) { + return "", false + } + return m[idx], true +} + +func (kv Scanner) Uint64(re *regexp.Regexp, group ...string) (uint64, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return 0, false + } + u, err := strconv.ParseUint(raw, 10, 64) + return u, err == nil +} + +func (kv Scanner) Int64(re *regexp.Regexp, group ...string) (int64, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return 0, false + } + i, err := strconv.ParseInt(raw, 10, 64) + return i, err == nil +} + +func (kv Scanner) Uint(re *regexp.Regexp, group ...string) (uint, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return 0, false + } + u, err := strconv.ParseUint(raw, 10, 0) + return uint(u), err == nil +} + +func (kv Scanner) Int(re *regexp.Regexp, group ...string) (int, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return 0, false + } + i, err := strconv.ParseInt(raw, 10, 0) + return int(i), err == nil +} + +func (kv Scanner) Float64(re *regexp.Regexp, group ...string) (float64, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return 0, false + } + f, err := strconv.ParseFloat(raw, 64) + if err != nil || math.IsNaN(f) || math.IsInf(f, 0) { + return 0, false + } + return f, true +} + +func (kv Scanner) Bool(re *regexp.Regexp, group ...string) (bool, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return false, false + } + switch strings.ToLower(raw) { + case "true", "1", "yes": + return true, true + case "false", "0", "no": + return false, true + default: + return false, false + } +} + +// DurationNs treats the captured value as nanoseconds and returns time.Duration. +func (kv Scanner) DurationNs(re *regexp.Regexp, group ...string) (time.Duration, bool) { + u, ok := kv.Uint64(re, group...) + if !ok || u > math.MaxInt64 { + return 0, false + } + return time.Duration(u), true +} + +// String returns the captured string, unquoting if it looks like "..." (handles \" escapes). +func (kv Scanner) String(re *regexp.Regexp, group ...string) (string, bool) { + raw, ok := kv.Raw(re, group...) + if !ok { + return "", false + } + if len(raw) >= 2 && raw[0] == '"' && raw[len(raw)-1] == '"' { + if unq, err := strconv.Unquote(raw); err == nil { + return unq, true + } + } + return raw, true +} + +func indexOfSubexpName(re *regexp.Regexp, name string) int { + for i, n := range re.SubexpNames() { + if n == name { + return i + } + } + return -1 +} + +func resolveGroupIndex(re *regexp.Regexp, group ...string) (int, bool) { + if len(group) == 0 || group[0] == "" { + if re.NumSubexp() < 1 { + return -1, false // no capturing groups + } + return 1, true // default to first group + } + idx := indexOfSubexpName(re, group[0]) + if idx <= 0 { + return -1, false // named group missing + } + return idx, true +} diff --git a/utils/kv_regex_test.go b/utils/kv_regex_test.go new file mode 100644 index 000000000..e3b43c98b --- /dev/null +++ b/utils/kv_regex_test.go @@ -0,0 +1,153 @@ +package utils + +import ( + "math" + "regexp" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestRaw_DefaultFirstGroup(t *testing.T) { + src := `num-pushed=(guint64)123 num-gap=(uint64)456` + kv := New(src) + + rePushed := regexp.MustCompile(`\bnum-pushed=\(g?uint64\)(\d+)`) + reGap := regexp.MustCompile(`\bnum-gap=\(g?uint64\)(\d+)`) + + rawPushed, ok := kv.Raw(rePushed) + require.True(t, ok, "expected match for num-pushed") + require.Equal(t, "123", rawPushed) + + rawGap, ok := kv.Raw(reGap) + require.True(t, ok, "expected match for num-gap") + require.Equal(t, "456", rawGap) +} + +func TestRaw_NamedGroup(t *testing.T) { + src := `plc-duration=(guint64)20000000` + kv := New(src) + + reDur := regexp.MustCompile(`\bplc-duration=\(g?uint64\)(?P\d+)`) + + raw, ok := kv.Raw(reDur, "ns") + require.True(t, ok, "expected match for plc-duration ns group") + require.Equal(t, "20000000", raw) +} + +func TestUint64_Int64_Uint_Int(t *testing.T) { + src := `u64=(guint64)184 r64=(int64)-42 u=(uint)48000 i=(int)7` + kv := New(src) + + reU64 := regexp.MustCompile(`\bu64=\(g?uint64\)(\d+)`) + reI64 := regexp.MustCompile(`\br64=\(g?int64\)(-?\d+)`) + reU := regexp.MustCompile(`\bu=\(g?uint\)(\d+)`) + reI := regexp.MustCompile(`\bi=\(g?int\)(-?\d+)`) + + vU64, ok := kv.Uint64(reU64) + require.True(t, ok) + require.Equal(t, uint64(184), vU64) + + vI64, ok := kv.Int64(reI64) + require.True(t, ok) + require.Equal(t, int64(-42), vI64) + + vU, ok := kv.Uint(reU) + require.True(t, ok) + require.Equal(t, uint(48000), vU) + + vI, ok := kv.Int(reI) + require.True(t, ok) + require.Equal(t, 7, vI) +} + +func TestFloat64_AndRejectNaNInf(t *testing.T) { + kv := New(`rms=(double)-12.25 bad1=(double)nan bad2=(double)inf`) + reOk := regexp.MustCompile(`\brms=\(g?double\)(-?[0-9.]+)`) + reNaN := regexp.MustCompile(`\bbad1=\(g?double\)([^,}\s]+)`) + reInf := regexp.MustCompile(`\bbad2=\(g?double\)([^,}\s]+)`) + + v, ok := kv.Float64(reOk) + require.True(t, ok) + require.Equal(t, -12.25, v) + + _, ok = kv.Float64(reNaN) + require.False(t, ok, "Float64 should reject NaN") + + _, ok = kv.Float64(reInf) + require.False(t, ok, "Float64 should reject Inf") +} + +func TestBool_CaseAndAliases(t *testing.T) { + kv := New(`b1=(gboolean)TRUE b2=(boolean)false b3=(boolean)Yes b4=(boolean)0 other=x`) + reB1 := regexp.MustCompile(`\bb1=\(g?boolean\)([^,}\s]+)`) + reB2 := regexp.MustCompile(`\bb2=\(g?boolean\)([^,}\s]+)`) + reB3 := regexp.MustCompile(`\bb3=\(g?boolean\)([^,}\s]+)`) + reB4 := regexp.MustCompile(`\bb4=\(g?boolean\)([^,}\s]+)`) + + v1, ok := kv.Bool(reB1) + require.True(t, ok) + require.True(t, v1) + + v2, ok := kv.Bool(reB2) + require.True(t, ok) + require.False(t, v2) + + v3, ok := kv.Bool(reB3) + require.True(t, ok) + require.True(t, v3) + + v4, ok := kv.Bool(reB4) + require.True(t, ok) + require.False(t, v4) +} + +func TestString_Unquote(t *testing.T) { + kv := New(`msg=(string)"hello \"world\"" raw=(string)plain`) + reQ := regexp.MustCompile(`\bmsg=\(string\)("(?:[^"\\]|\\.)*")`) + reRaw := regexp.MustCompile(`\braw=\(string\)([^,}\s]+)`) + + s, ok := kv.String(reQ) + require.True(t, ok) + require.Equal(t, `hello "world"`, s) + + s, ok = kv.String(reRaw) + require.True(t, ok) + require.Equal(t, "plain", s) +} + +func TestDurationNs(t *testing.T) { + kv := New(`d=(guint64)20000000`) // 20ms + re := regexp.MustCompile(`\bd=\(g?uint64\)(\d+)`) + + d, ok := kv.DurationNs(re) + require.True(t, ok) + require.Equal(t, 20*time.Millisecond, d) +} + +func TestDurationNs_OverflowGuard(t *testing.T) { + overflow := strconv.FormatUint(math.MaxUint64, 10) + kv := New(`d=(guint64)` + overflow) + re := regexp.MustCompile(`\bd=\(g?uint64\)(\d+)`) + + _, ok := kv.DurationNs(re) + require.False(t, ok, "DurationNs should fail on overflow") +} + +func TestNoCapturingGroup_ReturnsFalse(t *testing.T) { + kv := New(`nogrp=(uint)7`) + re := regexp.MustCompile(`\bnogrp=\(g?uint\)\d+`) // no capturing group + + _, ok := kv.Uint(re) + require.False(t, ok, "expected false when regex has no capturing group") +} + +func TestMissingNamedGroup_ReturnsFalse(t *testing.T) { + kv := New(`v=(int)42`) + re := regexp.MustCompile(`\bv=\(g?int\)(?P\d+)`) + + _, ok := kv.Int(re, "nope") // named group doesn't exist + require.False(t, ok, "expected false when named group is missing") +} From e1742e52f47775e5a2078cd0414060bf5e9d8f66 Mon Sep 17 00:00:00 2001 From: Milos Pesic Date: Mon, 8 Sep 2025 13:55:22 +0200 Subject: [PATCH 2/2] more specific ctor name --- utils/kv_regex.go | 2 +- utils/kv_regex_test.go | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/utils/kv_regex.go b/utils/kv_regex.go index 1f0b0ccf8..90c3eef34 100644 --- a/utils/kv_regex.go +++ b/utils/kv_regex.go @@ -13,7 +13,7 @@ type Scanner struct { src string } -func New(s string) Scanner { return Scanner{src: s} } +func NewKVRegexScanner(s string) Scanner { return Scanner{src: s} } // Raw returns the first capture group by default, or a named group if provided. // Example: diff --git a/utils/kv_regex_test.go b/utils/kv_regex_test.go index e3b43c98b..d3f756d9f 100644 --- a/utils/kv_regex_test.go +++ b/utils/kv_regex_test.go @@ -12,7 +12,7 @@ import ( func TestRaw_DefaultFirstGroup(t *testing.T) { src := `num-pushed=(guint64)123 num-gap=(uint64)456` - kv := New(src) + kv := NewKVRegexScanner(src) rePushed := regexp.MustCompile(`\bnum-pushed=\(g?uint64\)(\d+)`) reGap := regexp.MustCompile(`\bnum-gap=\(g?uint64\)(\d+)`) @@ -28,7 +28,7 @@ func TestRaw_DefaultFirstGroup(t *testing.T) { func TestRaw_NamedGroup(t *testing.T) { src := `plc-duration=(guint64)20000000` - kv := New(src) + kv := NewKVRegexScanner(src) reDur := regexp.MustCompile(`\bplc-duration=\(g?uint64\)(?P\d+)`) @@ -39,7 +39,7 @@ func TestRaw_NamedGroup(t *testing.T) { func TestUint64_Int64_Uint_Int(t *testing.T) { src := `u64=(guint64)184 r64=(int64)-42 u=(uint)48000 i=(int)7` - kv := New(src) + kv := NewKVRegexScanner(src) reU64 := regexp.MustCompile(`\bu64=\(g?uint64\)(\d+)`) reI64 := regexp.MustCompile(`\br64=\(g?int64\)(-?\d+)`) @@ -64,7 +64,7 @@ func TestUint64_Int64_Uint_Int(t *testing.T) { } func TestFloat64_AndRejectNaNInf(t *testing.T) { - kv := New(`rms=(double)-12.25 bad1=(double)nan bad2=(double)inf`) + kv := NewKVRegexScanner(`rms=(double)-12.25 bad1=(double)nan bad2=(double)inf`) reOk := regexp.MustCompile(`\brms=\(g?double\)(-?[0-9.]+)`) reNaN := regexp.MustCompile(`\bbad1=\(g?double\)([^,}\s]+)`) reInf := regexp.MustCompile(`\bbad2=\(g?double\)([^,}\s]+)`) @@ -81,7 +81,7 @@ func TestFloat64_AndRejectNaNInf(t *testing.T) { } func TestBool_CaseAndAliases(t *testing.T) { - kv := New(`b1=(gboolean)TRUE b2=(boolean)false b3=(boolean)Yes b4=(boolean)0 other=x`) + kv := NewKVRegexScanner(`b1=(gboolean)TRUE b2=(boolean)false b3=(boolean)Yes b4=(boolean)0 other=x`) reB1 := regexp.MustCompile(`\bb1=\(g?boolean\)([^,}\s]+)`) reB2 := regexp.MustCompile(`\bb2=\(g?boolean\)([^,}\s]+)`) reB3 := regexp.MustCompile(`\bb3=\(g?boolean\)([^,}\s]+)`) @@ -105,7 +105,7 @@ func TestBool_CaseAndAliases(t *testing.T) { } func TestString_Unquote(t *testing.T) { - kv := New(`msg=(string)"hello \"world\"" raw=(string)plain`) + kv := NewKVRegexScanner(`msg=(string)"hello \"world\"" raw=(string)plain`) reQ := regexp.MustCompile(`\bmsg=\(string\)("(?:[^"\\]|\\.)*")`) reRaw := regexp.MustCompile(`\braw=\(string\)([^,}\s]+)`) @@ -119,7 +119,7 @@ func TestString_Unquote(t *testing.T) { } func TestDurationNs(t *testing.T) { - kv := New(`d=(guint64)20000000`) // 20ms + kv := NewKVRegexScanner(`d=(guint64)20000000`) // 20ms re := regexp.MustCompile(`\bd=\(g?uint64\)(\d+)`) d, ok := kv.DurationNs(re) @@ -129,7 +129,7 @@ func TestDurationNs(t *testing.T) { func TestDurationNs_OverflowGuard(t *testing.T) { overflow := strconv.FormatUint(math.MaxUint64, 10) - kv := New(`d=(guint64)` + overflow) + kv := NewKVRegexScanner(`d=(guint64)` + overflow) re := regexp.MustCompile(`\bd=\(g?uint64\)(\d+)`) _, ok := kv.DurationNs(re) @@ -137,7 +137,7 @@ func TestDurationNs_OverflowGuard(t *testing.T) { } func TestNoCapturingGroup_ReturnsFalse(t *testing.T) { - kv := New(`nogrp=(uint)7`) + kv := NewKVRegexScanner(`nogrp=(uint)7`) re := regexp.MustCompile(`\bnogrp=\(g?uint\)\d+`) // no capturing group _, ok := kv.Uint(re) @@ -145,7 +145,7 @@ func TestNoCapturingGroup_ReturnsFalse(t *testing.T) { } func TestMissingNamedGroup_ReturnsFalse(t *testing.T) { - kv := New(`v=(int)42`) + kv := NewKVRegexScanner(`v=(int)42`) re := regexp.MustCompile(`\bv=\(g?int\)(?P\d+)`) _, ok := kv.Int(re, "nope") // named group doesn't exist