Skip to content

Commit

Permalink
Merge pull request #32 from knadh/tokenize-import
Browse files Browse the repository at this point in the history
  • Loading branch information
knadh committed Oct 24, 2023
2 parents b0b8b3b + de554cf commit 84619de
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
11 changes: 11 additions & 0 deletions internal/importer/importer.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,17 @@ func (im *Importer) readEntry(r []string) (entry, error) {
e.Initial = strings.ToUpper(string(e.Content[0]))
}

// If the Postgres tokenizer is not set, and there are no tokens supplied,
// see if the language has a custom one and use it.
if lang.Tokenizer != nil && e.TSVectorLang == "" && e.TSVectorTokens == "" {
tks, err := lang.Tokenizer.ToTokens(e.Content, lang.ID)
if err != nil {
return e, fmt.Errorf("error tokenizing content (word) at column 1: %v", err)
}

e.TSVectorTokens = strings.Join(tks, " ")
}

defTypeStr := cleanString(r[9])
if typ == typeDef {
defTypes := splitString(defTypeStr)
Expand Down
2 changes: 1 addition & 1 deletion tokenizers/indicphone/indicphone.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (ip *IndicPhone) ToTokens(s string, lang string) ([]string, error) {
}

if key0 == "" {
return nil, nil
continue
}

tokens = append(tokens,
Expand Down

0 comments on commit 84619de

Please sign in to comment.