Skip to content

Commit

Permalink
restored tokenizer for kannada
Browse files Browse the repository at this point in the history
  • Loading branch information
joicemjoseph committed Oct 1, 2020
1 parent aa43541 commit e5b2a8e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/go-sql-driver/mysql v1.5.0 // indirect
github.com/jmoiron/sqlx v1.2.0
github.com/knadh/goyesql v0.0.0-20190822122127-22fd9176b589
github.com/knadh/knphone v0.0.0-20190624172359-d31f66505cbf
github.com/knadh/koanf v0.13.0
github.com/knadh/paginator v0.0.0-20190812121607-b4fbeb7c977b
github.com/knadh/stuffbin v1.0.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhB
github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
github.com/knadh/goyesql v0.0.0-20190822122127-22fd9176b589 h1:+dkkaKpb9tbPEQIzunVA+1m4acbQzU0B2HW+lN9gsVA=
github.com/knadh/goyesql v0.0.0-20190822122127-22fd9176b589/go.mod h1:U9J4DmpkGzrHHKIs8XUtsyVnsZZKGtj3iwes78xrm6M=
github.com/knadh/knphone v0.0.0-20190624172359-d31f66505cbf h1:qaF4Sls+Z1vbo7j4Tz+QtwuZ0A+so4mCSoDIFGo/G+w=
github.com/knadh/knphone v0.0.0-20190624172359-d31f66505cbf/go.mod h1:tfUGBbPGiWlkwkFRF3WR3u0ybGgc0Pm/x3bObH0St0A=
github.com/knadh/koanf v0.13.0 h1:OEjNdmrP/5oAhJkNwTtarioqOC4xe6WxRK8Q5ffW8WU=
github.com/knadh/koanf v0.13.0/go.mod h1:7XDF7OJIqSQLUZnaXkjb1HB3CgMEYHyrzmgT8A6xAaE=
github.com/knadh/paginator v0.0.0-20190812121607-b4fbeb7c977b h1:1nu8rKo3secxUYyD725+fnEMGz38+cgaBS1c+jBI4Hk=
Expand Down
41 changes: 41 additions & 0 deletions tokenizers/kannada/kannada.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package main

import (
"fmt"

"github.com/knadh/dictmaker/search"
"github.com/knadh/knphone"
)

// Kannada is the Kannada tokenizer that generates tsvectors for romanized (knphone algorithm)
// Kannada strings
type Kannada struct {
ph *knphone.KNphone
}

// ID returns the ID of the tokenizer.
func (kn *Kannada) ID() string {
return "kannada"
}

// Name returns the name of the tokenizer.
func (kn *Kannada) Name() string {
return "Kannada"
}

// Tokenize tokenizes a Kannada string into Romanized (knphone) Postgres
// tsquery string.
func (kn *Kannada) Tokenize(in string) string {
key0, key1, key2 := kn.ph.Encode(in)
if key0 == "" {
return ""
}
return fmt.Sprintf("%s | (%s & %s) ", key2, key1, key0)
}

// New returns a new instance of the Kannada tokenizer.
func New() (search.Tokenizer, error) {
return &Kannada{
ph: knphone.New(),
}, nil
}

0 comments on commit e5b2a8e

Please sign in to comment.