This repository has been archived by the owner on Apr 10, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
84 lines (73 loc) · 1.55 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package main
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
)
var (
mapStdToneWord = loadWordMap("dict/vietnamese.std.dict")
mapNewToneWord = loadWordMap("dict/vietnamese.new.dict")
mapSpecialWord = loadWordMap("dict/vietnamese.sp.dict")
mapCommonWord = loadWordMap("dict/vietnamese.cm.dict")
newCommonWordFile = "dict/vietnamese.cm.dict"
)
func loadWordMap(wordListFile string) map[string]bool {
f, err := os.Open(wordListFile)
if err != nil {
log.Fatalln(err)
}
defer f.Close()
rd := bufio.NewReader(f)
m := map[string]bool{}
for {
line, _, err := rd.ReadLine()
if err == io.EOF {
break
} else if err != nil {
log.Fatalln(err)
}
if len(line) == 0 {
continue
}
m[string(line)] = true
}
return m
}
func main() {
fmt.Println("BEGIN")
log.Println(len(mapCommonWord))
log.Println(len(mapStdToneWord))
log.Println(len(mapNewToneWord))
allWords := dumpWiktionary()
m := map[string]bool{}
extractVietWord(allWords, m)
countNewWord := 0
for k := range m {
if _, ok := mapStdToneWord[k]; ok {
continue
} else if _, ok := mapNewToneWord[k]; ok {
continue
} else if _, ok := mapSpecialWord[k]; ok {
continue
} else if _, ok := mapCommonWord[k]; ok {
continue
} else {
mapCommonWord[k] = true
countNewWord++
}
}
log.Println("countNewWord:", countNewWord)
var words []string
for k := range mapCommonWord {
if len(k) > 0 {
words = append(words, k)
}
}
vnsort(words)
ioutil.WriteFile(newCommonWordFile, []byte(strings.Join(words, "\n")), 0777)
fmt.Println("DONE")
}