A high-performance multi-pattern regexp classifier for Go using pluggable regex engines. This library compiles multiple regexp patterns into a single regexp for efficient lookup.
go get github.com/sfkleach/regexptable- High Performance: Uses a single compiled regexp with named capture groups for O(n) matching regardless of pattern count (with Go's default regexp implementation)
- Builder Pattern:
RegexpTableBuilderprovides a convenient API that hides compilation complexity - Lazy Compilation: Defers regexp build until lookup for better performance when adding multiple patterns
- Type Safe: Generic implementation supports any value type
T - Reserved Namespace: Uses
__REGEXPTABLE_prefix to avoid conflicts with user-defined capture groups - Built-in Regexp: Uses Go's standard
regexppackage by default - no external dependencies - Full Match Access: Returns both the classified value and complete submatch details
- Pluggable Regexp Engines: Supports different regexp engines (see integrating with regexp2 for advanced features like lookbehind)
package main
import (
"fmt"
"github.com/sfkleach/regexptable"
)
type TokenType int
const (
TokenKeyword TokenType = iota
TokenIdentifier
TokenNumber
)
func main() {
// Use the builder pattern - no need to think about compilation!
table, err := regexptable.NewRegexpTableBuilder[TokenType]().
AddPattern(`\b(if|else|while|for)\b`, TokenKeyword).
AddPattern(`\b[a-zA-Z_][a-zA-Z0-9_]*\b`, TokenIdentifier).
AddPattern(`\b\d+\b`, TokenNumber).
Build()
if err != nil {
panic(err) // Only fails if patterns are invalid
}
// Use the table
if value, matches, ok := table.TryLookup("if"); ok {
fmt.Printf("Matched: %v (%s)\n", value, matches[0])
}
}// Simple fluent interface
table, err := regexptable.NewRegexpTableBuilder[string]().
AddPattern("hello", "greeting").
AddPattern("world", "place").
Build()// Create a base builder
base := regexptable.NewRegexpTableBuilder[TokenType]().
AddPattern(`form\w*`, FormStart).
AddPattern(`end\w*`, FormEnd)
// Clone and extend for different contexts
webBuilder := base.Clone().
AddPattern(`button\w*`, Button).
AddPattern(`input\w*`, Input)
codeBuilder := base.Clone().
AddPattern(`class\w*`, ClassDef).
AddPattern(`method\w*`, MethodDef)
// Build specialized tables
webTable, _ := webBuilder.Build()
codeTable, _ := codeBuilder.Build()// For static configs where patterns are known valid
var GlobalTokenTable = regexptable.NewRegexpTableBuilder[TokenType]().
AddPattern(`\b(if|else|while|for)\b`, Keyword).
AddPattern(`\b[a-zA-Z_]\w*\b`, Identifier).
AddPattern(`\b\d+\b`, Number).
MustBuild() // Panics if patterns are invalid
func lookupToken(input string) TokenType {
value, _, _ := GlobalTokenTable.TryLookup(input)
return value
}You can create alternation patterns (multiple patterns that map to the same value) in two ways:
// All these number formats map to "number"
table, err := regexptable.NewRegexpTableBuilder[string]().
AddSubPatterns([]string{`\d+`, `0x[0-9a-fA-F]+`, `0b[01]+`}, "number").
AddPattern(`[a-zA-Z]+`, "word").
Build(true, false)// Same result with type-safe method chaining
table, err := regexptable.NewRegexpTableBuilder[string]().
BeginAddSubPatterns().
AddSubPattern(`\d+`).
AddSubPattern(`0x[0-9a-fA-F]+`).
AddSubPattern(`0b[01]+`).
EndAddSubPatterns("number").
AddPattern(`[a-zA-Z]+`, "word").
Build(true, false)The type-safe interface prevents calling methods out of order and ensures proper
alternation construction. Both approaches create the same regexp pattern:
(?:\d+|0x[0-9a-fA-F]+|0b[01]+).
builder := regexptable.NewRegexpTableBuilder[string]()
// Add patterns
builder.AddPattern("test1", "value1")
builder.AddPattern("test2", "value2")
// Clear and reuse
builder.Clear()Creates a new builder for RegexpTable[T] using the standard Go regexp engine.
Creates a new builder using a custom regexp engine.
Adds a pattern to the builder. Returns the builder for method chaining.
Creates the final RegexpTable with all accumulated patterns. Compilation happens here.
Like Build but panics on error. Useful for static configurations.
Creates a copy of the builder with the same patterns and engine.
Removes all patterns from the builder.
Creates a new empty RegexpTable for values of type T using the standard Go regexp engine.
Creates a new empty RegexpTable using a custom regexp engine.
Adds a regexp pattern with its associated value to the table. Note: This method uses lazy compilation - the regexp is not compiled until lookup is performed.
Like AddPattern but immediately recompiles the regexp. Use this when you need immediate validation of the pattern or when you're only adding one pattern.
Manually rebuilds the union regexp from all registered patterns. This is exposed to allow manual control over when recompilation, and hence error checking, occurs.
Attempts to match the input against all registered patterns. Returns the associated value, submatch slice, and error. Automatically recompiles if patterns have been added/removed.
Like Lookup but returns a boolean success indicator instead of an error.
table := regexptable.NewRegexpTable[string](true, false) // start anchored, not end anchored
err := table.AddPattern(`\d+`, "number")
if err != nil {
// Handle regexp compilation error
}// Method 1: Using Lookup with error handling
if value, matches, err := table.Lookup(input); err != nil {
switch {
case strings.Contains(err.Error(), "no patterns configured"):
// Handle empty table
case strings.Contains(err.Error(), "no pattern matched"):
// Handle no match
default:
// Handle other errors
}
}
// Method 2: Using TryLookup for simple success/failure
if value, matches, ok := table.TryLookup(input); ok {
// Handle successful match
} else {
// Handle no match
}- Lazy compilation (default): Best when adding multiple patterns at once
- Immediate compilation: Best when you need immediate error feedback or adding single patterns
- Manual compilation: Best when you want precise control over compilation timing
RegexpTable compiles all patterns into a single union regexp like:
^(?:(?P<__REGEXPTABLE_1__>pattern1)|(?P<__REGEXPTABLE_2__>pattern2)|(?P<__REGEXPTABLE_3__>pattern3))
This provides O(n) matching performance regardless of the number of patterns, as opposed to O(n*m) when testing patterns individually.
The RegexpTable supports different regexp engines through the RegexpEngine interface. This allows you to use regexp engines with different named capture group syntaxes:
package main
import (
"fmt"
"github.com/sfkleach/regexptable"
)
func main() {
// Standard Go engine: (?P<name>pattern)
goTa.ble := regexptable.NewRegexpTableBuilder[string]()
.AddPattern("test.*", "match")
MustBuild()
// .NET-style engine: (?<name>pattern)
dotNetEngine := regexptable.NewDotNetRegexpEngine()
dotNetTable := regexptable.NewRegexpTableBuilderWithEngine[string](dotNetEngine)
.AddPattern("test.*", "match")
.MustBuild()
// Both tables work identically from the user's perspective
value, _, found := goTable.TryLookup("testing") // Returns "match", true
value, _, found = dotNetTable.TryLookup("testing") // Returns "match", true
// Show the different internal regexp formats
fmt.Printf("Go style: %s\n", regexptable.NewStandardRegexpEngine().FormatNamedGroup("test", "pattern"))
fmt.Printf(".NET style: %s\n", dotNetEngine.FormatNamedGroup("test", "pattern"))
}// Example: Python-style regexp engine
type PythonRegexpEngine struct{}
func (e *PythonRegexpEngine) Compile(pattern string) (regexptable.CompiledRegexp, error) {
// Wrap Go's regexp with your engine's interface
compiled, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return ®exptable.StandardCompiledRegexp{compiled}, nil
}
func (e *PythonRegexpEngine) FormatNamedGroup(groupName, pattern string) string {
return fmt.Sprintf("(?P<%s>%s)", groupName, pattern) // Python uses same as Go
}
// Use your custom engine
pythonTable := regexptable.NewRegexpTableBuilderWithEngine[string](&PythonRegexpEngine{}).
AddPattern("test.*", "match").
MustBuild()type TokenInfo struct {
Type string
Category string
}
table := regexptable.NewRegexpTable[TokenInfo]()
// Add complex patterns with rich metadata
table.AddPattern(`\b(if|else|while|for)\b`, TokenInfo{
Type: "keyword",
Category: "control",
})
table.AddPattern(`\b[a-zA-Z_][a-zA-Z0-9_]*\b`, TokenInfo{
Type: "identifier",
Category: "symbol",
})
// Lookup returns rich metadata
if info, matches, err := table.Lookup("if"); err == nil {
fmt.Printf("Type: %s, Category: %s\n", info.Type, info.Category)
}table := regexptable.NewRegexpTable[string]()
// Pattern with capture groups
table.AddPattern(`(\d{4})-(\d{2})-(\d{2})`, "date")
if value, matches, err := table.Lookup("2023-12-25"); err == nil {
fmt.Printf("Full match: %s\n", matches[0]) // "2023-12-25"
fmt.Printf("Year: %s\n", matches[1]) // "2023"
fmt.Printf("Month: %s\n", matches[2]) // "12"
fmt.Printf("Day: %s\n", matches[3]) // "25"
}- Uses Go's built-in
regexppackage with named capture groups - Auto-generates unique pattern names with reserved
__REGEXPTABLE_prefix - Compiles all patterns into a single union regexp for optimal performance
- Defers rebuilds to minimize overhead when adding multiple patterns (although this also defers the check for regexp syntax validity)
- Thread-safe for concurrent reads after compilation (not thread-safe for add/remove)