Skip to content

Commit

Permalink
adding cacheing and new test
Browse files Browse the repository at this point in the history
  • Loading branch information
richardanaya committed Jun 1, 2024
1 parent 1181b8a commit 9ee8d4e
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
44 changes: 42 additions & 2 deletions llm/grammar.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ import (
"strings"
)

// max size is 32kb
var maxGrammarSize = 32 * 1024

// a cache that stores max 100 grammars
var grammarValidationCache = make(map[string]error)

func findIndexOfTextNotInQuotesOrCharacterSet(input string, text string) int {
quoteBalance := 0
bracketBalance := 0
Expand Down Expand Up @@ -244,6 +250,11 @@ func validateCharacterClass(charClass string) error {
func validateStringLiteral(strLiteral string) error {
validEscapeCharacters := "\\\"ntrxu"

// make sure the string literal starts and ends with a quote
if len(strLiteral) < 2 || strLiteral[0] != '"' || strLiteral[len(strLiteral)-1] != '"' {
return fmt.Errorf("string literal must start and end with a quote")
}

i := 0
for i < len(strLiteral) {
if strLiteral[i] == '\\' {
Expand Down Expand Up @@ -463,7 +474,29 @@ func parseGrammar(grammar string) (map[string]([]Token), error) {
return ruleTokens, nil
}

func addToCache(grammar string, err error) {
if len(grammarValidationCache) >= 100 {
// remove the first element
for key := range grammarValidationCache {
delete(grammarValidationCache, key)
break
}
}
grammarValidationCache[grammar] = err
}

func ValidateGrammar(grammar string) error {
// check to see if we've cached this before and if so return it
if err, ok := grammarValidationCache[grammar]; ok {
return err
}

if len(grammar) > maxGrammarSize {
err := fmt.Errorf("grammar size exceeds maximum size of %d bytes", maxGrammarSize)
addToCache(grammar, err)
return err
}

// Since GBNF is essentially just a list of rules, we can validate the grammar by
// removing all comments, removing all non-essential white space
// and then breaking the input into an array of rules
Expand All @@ -474,6 +507,7 @@ func ValidateGrammar(grammar string) error {

ruleTokens, err := parseGrammar(grammar)
if err != nil {
addToCache(grammar, err)
return err
}

Expand All @@ -484,13 +518,19 @@ func ValidateGrammar(grammar string) error {

// check that it has root rule
if _, ok := definedRules["root"]; !ok {
return fmt.Errorf("no root rule defined")
err := fmt.Errorf("no root rule defined")
addToCache(grammar, err)
return err
}

for key, value := range ruleTokens {
if err := validateRule(value, definedRules); err != nil {
return fmt.Errorf("error in rule \"%s\": %v", key, err)
err = fmt.Errorf("error in rule \"%s\": %v", key, err)
addToCache(grammar, err)
return err
}
}

addToCache(grammar, nil)
return nil
}
9 changes: 9 additions & 0 deletions llm/grammar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,15 @@ func TestNoRoot(t *testing.T) {
}
}

func TestInvalidRoot(t *testing.T) {
// this is a common typo
input := `root ::= "yes`
err := ValidateGrammar(input)
if err == nil {
t.Errorf("Expected error validating grammar, got nil")
}
}

func TestBadLlama(t *testing.T) {
// this is a common typo
input := `root :== "yes" | "no"`
Expand Down

0 comments on commit 9ee8d4e

Please sign in to comment.