Package jtfidf provides calculations of TF(Term Frequency), IDF(Inverse Document Frequency) and TF-IDF values at Japanese documents.
This package uses kagome as Morphological Analyzer.
The calculation of the TF-IDF value in this package uses the IDF value plus 1. This is to prevent the TF-IDF value from becoming 0.
go get -u github.com/ramenjuniti/jtfidf
All usage are described in GoDoc.
AllTf returns all TF values in a doucument.
func ExampleAllTf() {
fmt.Println(AllTf("寿司が食べたい。"))
// Output: map[。:0.2 が:0.2 たい:0.2 寿司:0.2 食べ:0.2]
}
Tf returns TF value in a document.
func ExampleTf() {
fmt.Println(Tf("寿司", "寿司が食べたい。"))
// Output: 0.2
}
AllIdf returns all IDF values in documents.
func ExampleAllIdf() {
ds := []string{
"寿司が食べたい。",
}
fmt.Println(AllIdf(ds))
// Output: map[。:0 が:0 たい:0 寿司:0 食べ:0]
}
Idf retuns IDF value in documents.
func ExampleIdf() {
ds := []string{
"寿司が食べたい。",
}
fmt.Println(Idf("寿司", ds))
// Output: 0
}
AllTfidf retuns all TF-IDF values in documents.
func ExampleAllTfidf() {
ds := []string{
"寿司が食べたい。",
}
fmt.Println(AllTfidf(ds))
// Output: [map[。:0.2 が:0.2 たい:0.2 寿司:0.2 食べ:0.2]]
}
Tfidf returns TF-IDF value in documents.
func ExampleTfidf() {
ds := []string{
"寿司が食べたい。",
}
fmt.Println(Tfidf("寿司", ds[0], ds))
// Output: 0.2
}
This software is released under the MIT License, see LICENSE.