Skip to content

Commit

Permalink
Merge pull request #73 from s12chung/clean_dict
Browse files Browse the repository at this point in the history
Clean dict
  • Loading branch information
s12chung committed Sep 30, 2023
2 parents 729bc26 + a7bcfbd commit cecf12d
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 5 deletions.
2 changes: 1 addition & 1 deletion db/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func cmdSeed(ctx context.Context) error {
if err := seedkrdict.Seed(txQs, seedkrdict.DefaultRscPath); err != nil { //nolint:contextcheck // this is my pattern
return err
}
if err := testdb.SeedList(txQs, map[string]bool{"Terms": false}); err != nil {
if err := testdb.SeedList(txQs, map[string]bool{"Terms": false, "SourceStructureds": false}); err != nil {
return err
}
return txQs.Commit()
Expand Down
11 changes: 8 additions & 3 deletions db/pkg/seedkrdict/seed.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"maps"
"os"
"strings"

"github.com/s12chung/text2anki/db/pkg/db"
"github.com/s12chung/text2anki/pkg/dictionary"
Expand Down Expand Up @@ -141,17 +142,14 @@ func (l *LexicalEntry) Term() (dictionary.Term, error) {
if err != nil {
return dictionary.Term{}, err
}

pos, commonLevel, err := l.posCommonLevel()
if err != nil {
return dictionary.Term{}, fmt.Errorf("%w with text: %v", err, text)
}

translations := l.translations()
if len(translations) == 0 {
return dictionary.Term{}, NoTranslationsFoundError{text: text}
}

return dictionary.Term{
ID: int64(l.ID),
Text: text,
Expand Down Expand Up @@ -307,6 +305,10 @@ type Equivalent struct {

const engSenseLang = "영어"

var cleanTranslationMap = map[string]string{
""": "\"",
}

func (e *Equivalent) translation() (dictionary.Translation, error) {
isEng := false
for _, feat := range e.Feats {
Expand Down Expand Up @@ -336,6 +338,9 @@ func (e *Equivalent) translation() (dictionary.Translation, error) {
if explanation == "" {
err = fmt.Errorf("explanation is empty")
}
for k, v := range cleanTranslationMap {
explanation = strings.ReplaceAll(explanation, k, v)
}
return dictionary.Translation{
Text: text,
Explanation: explanation,
Expand Down
14 changes: 14 additions & 0 deletions db/pkg/seedkrdict/testdata/TestLexicalEntry_Term.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,19 @@
}
],
"dictionary_source": ""
},
{
"id": 71567,
"text": "-시",
"variants": [],
"part_of_speech": "Affix",
"common_level": 1,
"translations": [
{
"text": "-si",
"explanation": "A suffix used to mean \"considering it that way\" or \"seeing it that way.\""
}
],
"dictionary_source": ""
}
]
112 changes: 111 additions & 1 deletion db/pkg/seedkrdict/testdata/TestLexicalEntry_Term.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3350,6 +3350,116 @@
<feat att="definition" val="人死。" />
</Equivalent>
</Sense>
</LexicalEntry>
</LexicalEntry>
<LexicalEntry att="id" val="71567">
<feat att="homonym_number" val="4" />
<feat att="lexicalUnit" val="단어" />
<feat att="partOfSpeech" val="접사" />
<Lemma>
<feat att="writtenForm" val="-시" />
</Lemma>
<feat att="origin" val="" />
<feat att="vocabularyLevel" val="고급" />
<feat att="semanticCategory" val="인간 > 태도" />
<feat att="annotation" val="일부 명사 뒤에 붙는다." />
<Sense att="id" val="1">
<feat att="definition" val="'그렇게 여김' 또는 '그렇게 봄'의 뜻을 더하는 접미사." />
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="도외시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="동일시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="등한시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="문제시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="야만시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="의문시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="적대시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="죄악시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="중요시" />
</SenseExample>
<SenseExample>
<feat att="type" val="" />
<feat att="example" val="확실시" />
</SenseExample>
<Equivalent>
<feat att="language" val="영어" />
<feat att="lemma" val="-si" />
<feat att="definition" val="A suffix used to mean &amp;quot;considering it that way&amp;quot; or &amp;quot;seeing it that way.&amp;quot;" />
</Equivalent>
<Equivalent>
<feat att="language" val="일본어" />
<feat att="lemma" val="し【視】 " />
<feat att="definition" val="「そう思う」または「そう見なす」の意を付加する接尾辞。" />
</Equivalent>
<Equivalent>
<feat att="language" val="프랑스어" />
<feat att="lemma" val="(Pas d'expression équivalente)" />
<feat att="definition" val="Suffixe signifiant « le fait de considérer ainsi », ou « fait de voir ainsi »." />
</Equivalent>
<Equivalent>
<feat att="language" val="스페인어" />
<feat att="lemma" val="(No hay expresión equivalente)" />
<feat att="definition" val="Sufijo que añade el significado de 'que considera como tal' o 'que ve como tal'. " />
</Equivalent>
<Equivalent>
<feat att="language" val="아랍어" />
<feat att="lemma" val="(لا يوجد كلمة مرادفة)" />
<feat att="definition" val="اللاحقة التي تشير إلى معنى &amp;quot;اعتباره هكذا&amp;quot; أو &amp;quot;النظر إليه بهذا النحو&amp;quot; " />
</Equivalent>
<Equivalent>
<feat att="language" val="몽골어" />
<feat att="lemma" val="(Тохирох үг хэллэг байхгүй байна)" />
<feat att="definition" val="'тэгж тооцох' болон 'тэгж үзэх' хэмээн утга нэмдэг дагавар." />
</Equivalent>
<Equivalent>
<feat att="language" val="베트남어" />
<feat att="lemma" val="xem, xem như, cho là, cho rằng" />
<feat att="definition" val="Hậu tố thêm nghĩa 'cho là như thế' hoặc 'xem như thế'." />
</Equivalent>
<Equivalent>
<feat att="language" val="타이어" />
<feat att="lemma" val="การถือว่า..., การนับว่า..., การมองว่า..." />
<feat att="definition" val="ปัจจัยที่ใช้เพิ่มเข้าไปในคำเพื่อให้มีความหมายว่า 'การถือเป็นดังกล่าว' หรือ 'การมองเป็นดังกล่าว'" />
</Equivalent>
<Equivalent>
<feat att="language" val="인도네시아어" />
<feat att="lemma" val="menganggap~" />
<feat att="definition" val="akhiran yang menambahkan arti &amp;quot;menganggap demikian&amp;quot; atau &amp;quot;melihat demikian&amp;quot;" />
</Equivalent>
<Equivalent>
<feat att="language" val="러시아어" />
<feat att="lemma" val="(нет эквивалента)" />
<feat att="definition" val="Суффикс со значением &amp;quot;рассматривание подобны образом&amp;quot;." />
</Equivalent>
<Equivalent>
<feat att="language" val="중국어" />
<feat att="lemma" val="(无对应词汇)" />
<feat att="definition" val="后缀。指“看作是那样”或“那样看”。" />
</Equivalent>
</Sense>
</LexicalEntry>
</Lexicon>
</LexicalResource>

0 comments on commit cecf12d

Please sign in to comment.