From b64c8634875027bcfe448594e610c4a334885edd Mon Sep 17 00:00:00 2001 From: terasum Date: Sun, 22 Oct 2023 10:30:33 +0800 Subject: [PATCH] feat: support slqite index engine --- Makefile | 4 +- frontend/src/view/main/MainRightToolbar.vue | 43 ++++- go.mod | 4 + go.sum | 10 ++ internal/gomdict/mdict.go | 4 - internal/medengine/file.go | 62 +++++++ internal/medengine/file_test.go | 30 ++++ internal/medengine/fulltext.go | 1 + internal/medengine/index_engine.go | 145 ++++++++++++++++ internal/medengine/index_engine_test.go | 49 ++++++ internal/medengine/testdata/.gitignore | 1 + internal/utils/pathutil.go | 7 + pkg/model/dict_interface.go | 3 + pkg/service/.gitignore | 1 + pkg/service/mdict.go | 89 ++++------ pkg/service/mdict_test.go | 46 ++++- pkg/service/mdict_virtual_mdict.go | 176 ++++++++++++++++++++ pkg/service/startdict.go | 4 + 18 files changed, 606 insertions(+), 73 deletions(-) create mode 100644 internal/medengine/file.go create mode 100644 internal/medengine/file_test.go create mode 100644 internal/medengine/fulltext.go create mode 100644 internal/medengine/index_engine.go create mode 100644 internal/medengine/index_engine_test.go create mode 100644 internal/medengine/testdata/.gitignore create mode 100644 pkg/service/.gitignore create mode 100644 pkg/service/mdict_virtual_mdict.go diff --git a/Makefile b/Makefile index eae1fdb0..69d1004f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ dev: wails dev --loglevel info build: - wails build -devtools + wails build license: addlicense -c "Quan Chen " -l gpl3 -v -y 2023 -ignore frontend/**/* -ignore build/**/* -ignore .github/**/* frontend/src @@ -11,4 +11,4 @@ create-dmg: -.PHONY: build license \ No newline at end of file +.PHONY: build license diff --git a/frontend/src/view/main/MainRightToolbar.vue b/frontend/src/view/main/MainRightToolbar.vue index 04183789..f0ea4a5e 100644 --- a/frontend/src/view/main/MainRightToolbar.vue +++ b/frontend/src/view/main/MainRightToolbar.vue @@ -136,19 +136,48 @@ function loadDictionaries() { const totalNumber = res.length; const updater = updateProgress(totalNumber); - for (let i = 0; i < res.length; i++) { - state.dictList.push(res[i]); - console.log(`[app-init] building dictionary, index: ${i}`, res[i]) + function sequenceHandle(promiseArr) { + const pro = promiseArr.shift() + if(pro && pro.handle) { + pro.handle().then((resp)=>{ + pro.callback(resp); + sequenceHandle(promiseArr) + }) + } + } - BuildIndex(res[i].id).then((resp) => { - let progressHint = `词典 ${res[i].name} 加载完成`; + function buildIndexPromise(i, id, name) { + return { + handle: function() { + return BuildIndex(id) + }, + callback: function(resp) { + let progressHint = `词典 ${name} 加载完成`; console.log(`[app-init] building success, index: ${i}`, resp); updater(progressHint) + } + } + } - - }); + let promiseArray = []; + for (let i = 0; i < res.length; i++) { + state.dictList.push(res[i]); + promiseArray.push(buildIndexPromise(i, res[i].id, res[i].name)) } + sequenceHandle(promiseArray); + + // for (let i = 0; i < res.length; i++) { + // state.dictList.push(res[i]); + // console.log(`[app-init] building dictionary, index: ${i}`, res[i]) + + // BuildIndex(res[i].id).then((resp) => { + // let progressHint = `词典 ${res[i].name} 加载完成`; + // console.log(`[app-init] building success, index: ${i}`, resp); + // updater(progressHint) + // }); + // } + }); } diff --git a/go.mod b/go.mod index 8c282be8..b2d4dda1 100644 --- a/go.mod +++ b/go.mod @@ -11,8 +11,10 @@ require ( github.com/creasty/go-levenshtein v0.0.0-20161128082938-38ce641d5030 github.com/gin-gonic/gin v1.9.1 github.com/kirsle/configdir v0.0.0-20170128060238-e45d2f54772f + github.com/mattn/go-sqlite3 v1.14.17 github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 github.com/rasky/go-lzo v0.0.0-20200203143853-96a758eda86e + github.com/silenceper/pool v1.0.0 github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 github.com/spf13/viper v1.10.1 go.etcd.io/etcd/client/pkg/v3 v3.5.1 @@ -37,6 +39,7 @@ require ( github.com/jchv/go-winloader v0.0.0-20210711035445-715c2860da7e // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect + github.com/konsorten/go-windows-terminal-sequences v1.0.1 // indirect github.com/labstack/echo/v4 v4.10.2 // indirect github.com/labstack/gommon v0.4.0 // indirect github.com/leaanthony/go-ansi-parser v1.6.0 // indirect @@ -57,6 +60,7 @@ require ( github.com/rivo/uniseg v0.2.0 // indirect github.com/rogpeppe/go-internal v1.6.1 // indirect github.com/samber/lo v1.38.1 // indirect + github.com/sirupsen/logrus v1.4.2 // indirect github.com/spf13/afero v1.6.0 // indirect github.com/spf13/cast v1.4.1 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect diff --git a/go.sum b/go.sum index 73928ef1..0bd28157 100644 --- a/go.sum +++ b/go.sum @@ -51,6 +51,8 @@ github.com/kirsle/configdir v0.0.0-20170128060238-e45d2f54772f/go.mod h1:4rEELDS github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -84,6 +86,8 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM= +github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/mitchellh/mapstructure v1.4.3 h1:OVowDSCllw/YjdLkam3/sm7wEtOy59d8ndGgCcyj8cs= github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -113,6 +117,10 @@ github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBO github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/silenceper/pool v1.0.0 h1:JTCaA+U6hJAA0P8nCx+JfsRCHMwLTfatsm5QXelffmU= +github.com/silenceper/pool v1.0.0/go.mod h1:3DN13bqAbq86Lmzf6iUXWEPIWFPOSYVfaoceFvilKKI= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 h1:JIAuq3EEf9cgbU6AtGPK4CTG3Zf6CKMNqf0MHTggAUA= github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= @@ -126,6 +134,7 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/spf13/viper v1.10.1 h1:nuJZuYpG7gTj/XqiUwg8bA0cp1+M2mC3J4g5luUYBKk= github.com/spf13/viper v1.10.1/go.mod h1:IGlFPqhNAPKRxohIzWpI5QEy4kuI7tcl5WvR+8qy1rU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -178,6 +187,7 @@ golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200810151505-1b9f1253b3ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/gomdict/mdict.go b/internal/gomdict/mdict.go index bce03c95..197a9b30 100644 --- a/internal/gomdict/mdict.go +++ b/internal/gomdict/mdict.go @@ -82,10 +82,6 @@ func (mdict *Mdict) BuildIndex() error { return err } - err = mdict.BuildBKTree() - if err != nil { - return err - } return nil } diff --git a/internal/medengine/file.go b/internal/medengine/file.go new file mode 100644 index 00000000..f2f83031 --- /dev/null +++ b/internal/medengine/file.go @@ -0,0 +1,62 @@ +// +// Copyright (C) 2023 Quan Chen +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package medengine + +import ( + "database/sql" + _ "github.com/mattn/go-sqlite3" + "github.com/op/go-logging" + "github.com/terasum/medict/internal/utils" +) + +var log = logging.MustGetLogger("default") + +// CreateMeIndex creates Medict index file +// Medict words index file format: +// filename: meidx +// internal format: sqlite unzipped +// sqlite table name: meidx_keyword_index +// table columns: +// -------------------------- +// | idx_no | key_word | key_block_index | record_start_offset | record_end_offset | compressed_size | decompressed_size | dict_type | +func CreateMeIndex(idxFilePath string) error { + if utils.FileExists(idxFilePath) { + return nil + } + + db, err := sql.Open("sqlite3", idxFilePath) + if err != nil { + return err + } + defer db.Close() + + sqlStmt := ` + DROP TABLE if EXISTS meidx_keyword_index; + CREATE TABLE meidx_keyword_index ( + idx_no integer primary key autoincrement not null, + key_word varchar(512) unique, + key_block_index long , + record_start_offset long , + record_end_offset long); +-- CREATE INDEX index_meidx_keyword_index_keyword ON meidx_keyword_index(key_word); + ` + _, err = db.Exec(sqlStmt) + if err != nil { + return err + } + return nil +} diff --git a/internal/medengine/file_test.go b/internal/medengine/file_test.go new file mode 100644 index 00000000..79c8258b --- /dev/null +++ b/internal/medengine/file_test.go @@ -0,0 +1,30 @@ +// +// Copyright (C) 2023 Quan Chen +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package medengine + +import ( + _ "github.com/mattn/go-sqlite3" + "testing" +) + +func TestCreateMeIndex(t *testing.T) { + err := CreateMeIndex("./testdata/testidx.meidx") + if err != nil { + t.Fatal(err) + } + +} diff --git a/internal/medengine/fulltext.go b/internal/medengine/fulltext.go new file mode 100644 index 00000000..0610859b --- /dev/null +++ b/internal/medengine/fulltext.go @@ -0,0 +1 @@ +package medengine diff --git a/internal/medengine/index_engine.go b/internal/medengine/index_engine.go new file mode 100644 index 00000000..8931afb7 --- /dev/null +++ b/internal/medengine/index_engine.go @@ -0,0 +1,145 @@ +package medengine + +import ( + "database/sql" + "github.com/silenceper/pool" + "github.com/terasum/medict/pkg/model" + "time" +) + +type IndexEngine struct { + indexFilePath string + connPool pool.Pool +} + +type IndexRecord struct { + keyWord string + keyBlockIndex int64 + recordStartOffset int64 + recordEndOffset int64 +} + +func NewIndexRecord( + KeyWord string, + RecordStartOffset int64, + RecordEndOffset int64, + KeyBlockIdx int64, +) *IndexRecord { + return &IndexRecord{ + keyWord: KeyWord, + keyBlockIndex: KeyBlockIdx, + recordStartOffset: RecordStartOffset, + recordEndOffset: RecordEndOffset, + } +} + +func (idr *IndexRecord) ToKeyBlockEntry() *model.KeyBlockEntry { + return &model.KeyBlockEntry{ + ID: 0, + RecordStartOffset: idr.recordStartOffset, + RecordEndOffset: idr.recordEndOffset, + KeyWord: idr.keyWord, + KeyBlockIdx: idr.keyBlockIndex, + } +} + +func NewEngine(indexFilePath string) (*IndexEngine, error) { + err := CreateMeIndex(indexFilePath) + if err != nil { + return nil, err + } + + connPool, err := pool.NewChannelPool(&pool.Config{ + InitialCap: 1, + MaxCap: 3, + MaxIdle: 3, + Factory: func() (interface{}, error) { + db, err1 := sql.Open("sqlite3", indexFilePath) + if err1 != nil { + return nil, err1 + } + return db, nil + }, + Close: func(db interface{}) error { + return db.(*sql.DB).Close() + }, + Ping: func(db interface{}) error { + return nil + }, + IdleTimeout: 60 * time.Second, + }) + if err != nil { + return nil, err + } + + return &IndexEngine{ + indexFilePath: indexFilePath, + connPool: connPool, + }, nil +} + +func (engine *IndexEngine) AddRecord(record *IndexRecord) error { + db, err := engine.Acquire() + if err != nil { + return err + } + defer engine.Release(db) + + sqlfmt := `INSERT INTO meidx_keyword_index ( + key_word, + key_block_index, + record_start_offset, + record_end_offset) + VALUES(?, ?, ?, ?)` + statement, err := db.Prepare(sqlfmt) + if err != nil { + return err + } + _, err = statement.Exec(record.keyWord, record.keyBlockIndex, record.recordStartOffset, record.recordEndOffset) + return err +} + +func (engine *IndexEngine) Search(keyword string) ([]*IndexRecord, error) { + db, err := engine.Acquire() + if err != nil { + return nil, err + } + defer engine.Release(db) + + sqlfmt := `SELECT key_word, key_block_index, record_start_offset, record_end_offset FROM meidx_keyword_index WHERE key_word LIKE ?` + statement, err := db.Prepare(sqlfmt) + if err != nil { + return nil, err + } + defer statement.Close() + result, err := statement.Query(keyword + "%") + if err != nil { + return nil, err + } + results := make([]*IndexRecord, 0) + for result.Next() { + temp := new(IndexRecord) + err1 := result.Scan(&(temp.keyWord), &(temp.keyBlockIndex), &(temp.recordStartOffset), &(temp.recordEndOffset)) + if err1 != nil { + log.Errorf("sql query error %s", err1.Error()) + } + results = append(results, temp) + } + return results, nil +} + +func (engine *IndexEngine) Acquire() (*sql.DB, error) { + db, err := engine.connPool.Get() + return db.(*sql.DB), err +} + +func (engine *IndexEngine) Release(db *sql.DB) { + err := engine.connPool.Put(db) + if err != nil { + log.Errorf(err.Error()) + } +} + +func (engine *IndexEngine) Close() { + engine.connPool.Release() +} diff --git a/internal/medengine/index_engine_test.go b/internal/medengine/index_engine_test.go new file mode 100644 index 00000000..d4f840b4 --- /dev/null +++ b/internal/medengine/index_engine_test.go @@ -0,0 +1,49 @@ +package medengine + +import "testing" + +func TestIndexEngine_AddRecord(t *testing.T) { + type fields struct { + indexFilePath string + } + type args struct { + record *IndexRecord + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "t1", + fields: struct{ indexFilePath string }{indexFilePath: "./testdata/testidx.meidx"}, + args: args{record: &IndexRecord{ + keyWord: "0test", + keyBlockIndex: 0, + recordStartOffset: 1231, + recordEndOffset: 1233, + }}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + engine, err := NewEngine(tt.fields.indexFilePath) + if err != nil { + t.Fatal(err) + } + db, err := engine.Acquire() + if err != nil { + t.Fatal(err) + } + + defer engine.Release(db) + + if err = engine.AddRecord(tt.args.record); (err != nil) != tt.wantErr { + t.Errorf("AddRecord() error = %v, wantErr %v", err, tt.wantErr) + } + + }) + } +} diff --git a/internal/medengine/testdata/.gitignore b/internal/medengine/testdata/.gitignore new file mode 100644 index 00000000..b65ac623 --- /dev/null +++ b/internal/medengine/testdata/.gitignore @@ -0,0 +1 @@ +testidx.meidx \ No newline at end of file diff --git a/internal/utils/pathutil.go b/internal/utils/pathutil.go index e1e7cac0..68317f57 100644 --- a/internal/utils/pathutil.go +++ b/internal/utils/pathutil.go @@ -52,3 +52,10 @@ func FileExists(fpath string) bool { return false } + +func FileRemove(fpath string) error { + if FileExists(fpath) && fpath != "/" { + return os.RemoveAll(fpath) + } + return nil +} diff --git a/pkg/model/dict_interface.go b/pkg/model/dict_interface.go index 3ecfe6c7..401598ce 100644 --- a/pkg/model/dict_interface.go +++ b/pkg/model/dict_interface.go @@ -9,6 +9,9 @@ const ImgTypeJPG ImgType = "jpg" const ImgTypePNG ImgType = "png" type GeneralDictionary interface { + // 取得当前词典所有词列表 + KeyList() []string + // BuildIndex 构建索引 BuildIndex() error // DictType 返回词典类型, 目前仅支持 mdict 和 stardict DictType() DictType diff --git a/pkg/service/.gitignore b/pkg/service/.gitignore new file mode 100644 index 00000000..6d72ab8e --- /dev/null +++ b/pkg/service/.gitignore @@ -0,0 +1 @@ +/testdata \ No newline at end of file diff --git a/pkg/service/mdict.go b/pkg/service/mdict.go index 284e37e4..66ef4f73 100644 --- a/pkg/service/mdict.go +++ b/pkg/service/mdict.go @@ -7,18 +7,18 @@ import ( "strings" "sync" - "github.com/terasum/medict/internal/gomdict" "github.com/terasum/medict/pkg/model" ) var _ model.GeneralDictionary = &Mdict{} type Mdict struct { - mdxFilePath string - mddFilePaths []string - mdxins *gomdict.Mdict - mddinss []*gomdict.Mdict - + mdxFilePath string + mddFilePaths []string + mdx *virtualMdict + mdxIdxFilePath string + mdds []*virtualMdict + mddsIdxFilePaths []string hasBuildIndex bool buildingIndexLock *sync.Mutex } @@ -28,26 +28,26 @@ func NewMdict(dirItem *model.DirItem) (model.GeneralDictionary, error) { mdxFilePath: dirItem.MdictMdxAbsPath, mddFilePaths: dirItem.MdictMddAbsPath, hasBuildIndex: false, + mddsIdxFilePaths: make([]string, len(dirItem.MdictMddAbsPath)), buildingIndexLock: new(sync.Mutex), } - mdx, err := gomdict.New(dirItem.MdictMdxAbsPath) + mdx, err := newVirtual(dirItem.MdictMdxAbsPath) if err != nil { return nil, fmt.Errorf("new mdx file failed, %s", err.Error()) } - mdds := make([]*gomdict.Mdict, 0) - + mdds := make([]*virtualMdict, 0) for _, mddpath := range dirItem.MdictMddAbsPath { - mdd, err1 := gomdict.New(mddpath) + mdd, err1 := newVirtual(mddpath) if err1 != nil { return nil, fmt.Errorf("new mdd file failed, %s", err1.Error()) } mdds = append(mdds, mdd) } - mdict.mdxins = mdx - mdict.mddinss = mdds + mdict.mdx = mdx + mdict.mdds = mdds return mdict, nil @@ -63,15 +63,14 @@ func (md *Mdict) Name() string { } func (md *Mdict) Description() *model.PlainDictionaryInfo { - if md.mdxins == nil { + if md.mdx == nil { return &model.PlainDictionaryInfo{} } - return &model.PlainDictionaryInfo{ - Title: md.mdxins.Meta.Title, - Description: md.mdxins.Meta.Description, - CreateDate: md.mdxins.Meta.CreationDate, - GenerateEngineVersion: md.mdxins.Meta.GeneratedByEngineVersion, - } + return md.mdx.description() +} + +func (md *Mdict) KeyList() []string { + return md.mdx.keyList() } func (md *Mdict) BuildIndex() error { @@ -81,16 +80,20 @@ func (md *Mdict) BuildIndex() error { return nil } - err := md.mdxins.BuildIndex() + mdxIndexFilePath := md.mdx.filePath + ".meidx" + err := md.mdx.index(mdxIndexFilePath) if err != nil { return err } + md.mdxIdxFilePath = mdxIndexFilePath - for _, mdd := range md.mddinss { - err1 := mdd.BuildIndex() + for i, mdd := range md.mdds { + mddIndexFilePath := mdd.filePath + ".meidx" + err1 := mdd.index(mddIndexFilePath) if err1 != nil { return err1 } + md.mddsIdxFilePaths[i] = mddIndexFilePath } md.hasBuildIndex = true @@ -101,18 +104,10 @@ func (md *Mdict) Locate(entry *model.KeyIndex) ([]byte, error) { if !md.hasBuildIndex { return nil, errors.New("dictionary not ready, building index first") } - mdictEntry := &gomdict.MDictKeyBlockEntry{ - RecordStartOffset: entry.RecordStartOffset, - RecordEndOffset: entry.RecordEndOffset, - KeyWord: entry.KeyWord, - KeyBlockIdx: entry.KeyBlockIdx, - } - def, err := md.mdxins.Locate(mdictEntry) - return def, err + return md.mdx.locate(entry) } func (md *Mdict) DictType() model.DictType { - return model.DictTypeMdict } @@ -120,8 +115,7 @@ func (md *Mdict) Lookup(keyword string) ([]byte, error) { if !md.hasBuildIndex { return nil, errors.New("dictionary not ready, building index first") } - def, err := md.mdxins.Lookup(keyword) - return def, err + return md.mdx.lookup(keyword) } func (md *Mdict) LookupResource(keyword string) ([]byte, error) { @@ -131,8 +125,8 @@ func (md *Mdict) LookupResource(keyword string) ([]byte, error) { var err error var def []byte - for _, mdd := range md.mddinss { - def, err = mdd.Lookup(keyword) + for _, mdd := range md.mdds { + def, err = mdd.lookup(keyword) if err != nil { log.Infof("mdict.LookupResource failed, key [%s] not found", keyword) continue @@ -156,27 +150,6 @@ func (md *Mdict) Search(keyword string) ([]*model.KeyIndex, error) { return nil, errors.New("dictionary not ready, building index first") } - entries, err := md.mdxins.Search(keyword) - if err != nil { - return nil, err - } - - results := make([]*model.KeyIndex, 0) - - for id, e := range entries { - temp := &model.KeyBlockEntry{ - ID: id, - RecordStartOffset: e.RecordStartOffset, - RecordEndOffset: e.RecordEndOffset, - KeyWord: e.KeyWord, - KeyBlockIdx: e.KeyBlockIdx, - } - tempIdx := &model.KeyIndex{ - IndexType: model.IndexTypeMdict, - KeyBlockEntry: temp, - } - results = append(results, tempIdx) - - } - return results, nil + // search from index file + return md.mdx.searchFromIndex(keyword) } diff --git a/pkg/service/mdict_test.go b/pkg/service/mdict_test.go index efa51e73..1bfdf517 100644 --- a/pkg/service/mdict_test.go +++ b/pkg/service/mdict_test.go @@ -1,7 +1,9 @@ package service import ( + "encoding/json" "github.com/stretchr/testify/assert" + "github.com/terasum/medict/pkg/model" "testing" ) @@ -9,11 +11,51 @@ func TestMdict_Name(t *testing.T) { mdict := &Mdict{ mdxFilePath: "/User/yourname/test/dict/test.mdx", mddFilePaths: nil, - mdxins: nil, - mddinss: nil, hasBuildIndex: false, buildingIndexLock: nil, } t.Logf("name is %s", mdict.Name()) assert.Equal(t, "test", mdict.Name()) } + +func TestCreateSqliteIndex(t *testing.T) { + + mdict, err := NewMdict(&model.DirItem{ + BaseDir: "testdata", + CurrentDir: "testdata/mdict", + IsValid: true, + DictType: model.DictTypeMdict, + CoverImgPath: "", + CoverImgType: "", + ConfigPath: "", + LicensePath: "", + MdictMdxFileName: "testdict", + MdictMdxAbsPath: "testdata/mdict/testdict.mdx", + MdictMddAbsPath: []string{"testdata/mdict/testdict.mdd"}, + StarDictDzAbsPath: "", + StarDictAbsPath: "", + StarDictIdxAbsPath: "", + StarDictIfoAbsPath: "", + }) + + if err != nil { + t.Fatal(err) + } + t.Logf(mdict.Name()) + err = mdict.BuildIndex() + if err != nil { + t.Fatal(err) + } + + records, err := mdict.Search("hello") + if err != nil { + t.Fatal(err) + } + for _, record := range records { + data, _ := json.Marshal(record) + t.Logf("%s", data) + def, err := mdict.Locate(record) + t.Logf("def: %s, err: %v", def, err) + } + +} diff --git a/pkg/service/mdict_virtual_mdict.go b/pkg/service/mdict_virtual_mdict.go new file mode 100644 index 00000000..c9ede731 --- /dev/null +++ b/pkg/service/mdict_virtual_mdict.go @@ -0,0 +1,176 @@ +package service + +import ( + "errors" + "fmt" + "github.com/terasum/medict/internal/gomdict" + "github.com/terasum/medict/internal/medengine" + "github.com/terasum/medict/internal/utils" + "github.com/terasum/medict/pkg/model" + "path/filepath" +) + +type virtualMdict struct { + instance *gomdict.Mdict + idxEngine *medengine.IndexEngine + filePath string + dictType string +} + +const ( + virtualTypeMdx = "dictTypeMdx" + virtualTypeMdd = "dictTypeMdd" +) + +func newVirtual(filePath string) (*virtualMdict, error) { + + mdx, err := gomdict.New(filePath) + if err != nil { + return nil, fmt.Errorf("new mdx file failed, %s", err.Error()) + } + + ext := filepath.Ext(filePath) + var dt string + if ext == ".mdx" { + dt = virtualTypeMdx + } else if ext == ".mdd" { + dt = virtualTypeMdd + } + + return &virtualMdict{ + instance: mdx, + idxEngine: nil, + filePath: filePath, + dictType: dt, + }, nil +} + +func (vm *virtualMdict) description() *model.PlainDictionaryInfo { + if vm.instance == nil { + return &model.PlainDictionaryInfo{} + } + return &model.PlainDictionaryInfo{ + Title: vm.instance.Meta.Title, + Description: vm.instance.Meta.Description, + CreateDate: vm.instance.Meta.CreationDate, + GenerateEngineVersion: vm.instance.Meta.GeneratedByEngineVersion, + } +} + +func (vm *virtualMdict) keyList() []string { + result := make([]string, 0) + for _, e := range vm.instance.KeyBlockData.KeyEntries { + result = append(result, e.KeyWord) + } + + return result +} + +func (vm *virtualMdict) locate(entry *model.KeyIndex) ([]byte, error) { + mdictEntry := &gomdict.MDictKeyBlockEntry{ + RecordStartOffset: entry.RecordStartOffset, + RecordEndOffset: entry.RecordEndOffset, + KeyWord: entry.KeyWord, + KeyBlockIdx: entry.KeyBlockIdx, + } + return vm.instance.Locate(mdictEntry) +} + +func (vm *virtualMdict) lookup(keyword string) ([]byte, error) { + return vm.instance.Lookup(keyword) +} + +func (vm *virtualMdict) searchFromIndex(keyword string) ([]*model.KeyIndex, error) { + if vm.idxEngine == nil { + return nil, errors.New("virtual mdict hasn't built the index") + } + + records, err := vm.idxEngine.Search(keyword) + if err != nil { + return nil, err + } + results := make([]*model.KeyIndex, 0) + for idx, record := range records { + kblockEntry := record.ToKeyBlockEntry() + kblockEntry.ID = idx + results = append(results, &model.KeyIndex{ + IndexType: model.IndexTypeMdict, + KeyBlockEntry: kblockEntry, + }) + } + + return results, nil + +} + +func (vm *virtualMdict) search(keyword string) ([]*model.KeyIndex, error) { + + entries, err := vm.instance.Search(keyword) + if err != nil { + return nil, err + } + + results := make([]*model.KeyIndex, 0) + + for id, e := range entries { + temp := &model.KeyBlockEntry{ + ID: id, + RecordStartOffset: e.RecordStartOffset, + RecordEndOffset: e.RecordEndOffset, + KeyWord: e.KeyWord, + KeyBlockIdx: e.KeyBlockIdx, + } + tempIdx := &model.KeyIndex{ + IndexType: model.IndexTypeMdict, + KeyBlockEntry: temp, + } + results = append(results, tempIdx) + + } + return results, nil +} + +func (vm *virtualMdict) index(idxFilePath string) error { + var eng *medengine.IndexEngine + var err error + //err = vm.instance.BuildIndex() + //if err != nil { + // return err + //} + + // has already built index + if utils.FileExists(idxFilePath) { + eng, err = medengine.NewEngine(idxFilePath) + if err != nil { + return err + } + vm.idxEngine = eng + return nil + } + + // TODO ignore bktree index + // err = md.mdxins.BuildBKTree() + // if err != nil { + // return err + // } + + eng, err = medengine.NewEngine(idxFilePath) + if err != nil { + return err + } + + for _, e := range vm.instance.KeyBlockData.KeyEntries { + err = eng.AddRecord(medengine.NewIndexRecord( + e.KeyWord, + e.RecordStartOffset, + e.RecordEndOffset, + e.KeyBlockIdx, + )) + if err != nil { + continue + } + } + + vm.idxEngine = eng + return nil +} diff --git a/pkg/service/startdict.go b/pkg/service/startdict.go index 1c0d8f82..3f6a1b64 100644 --- a/pkg/service/startdict.go +++ b/pkg/service/startdict.go @@ -44,6 +44,10 @@ func (bs *bkString) Distance(entry bktree.Entry) int { return levenshtein.Distance(bs.w, entry.(*bkString).w) } +func (s *StarDict) KeyList() []string { + return []string{} +} + func (s *StarDict) BuildIndex() error { if s.ready { return nil