Skip to content

Commit

Permalink
Use a custom tokenizer if an import entry's language has one defined …
Browse files Browse the repository at this point in the history
…to auto-generate tokens on import.
  • Loading branch information
knadh committed Oct 24, 2023
1 parent 68f1ba8 commit 3190001
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions internal/importer/importer.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,17 @@ func (im *Importer) readEntry(r []string) (entry, error) {
e.Initial = strings.ToUpper(string(e.Content[0]))
}

// If the Postgres tokenizer is not set, and there are no tokens supplied,
// see if the language has a custom one and use it.
if lang.Tokenizer != nil && e.TSVectorLang == "" && e.TSVectorTokens == "" {
tks, err := lang.Tokenizer.ToTokens(e.Content, lang.ID)
if err != nil {
return e, fmt.Errorf("error tokenizing content (word) at column 1: %v", err)
}

e.TSVectorTokens = strings.Join(tks, " ")
}

defTypeStr := cleanString(r[9])
if typ == typeDef {
defTypes := splitString(defTypeStr)
Expand Down

0 comments on commit 3190001

Please sign in to comment.