Skip to content

Commit

Permalink
don't allow duplicate entries in the sparse matrix for duplicate ngrams
Browse files Browse the repository at this point in the history
  • Loading branch information
drewlanenga committed Jan 2, 2015
1 parent d2969f5 commit 480d31c
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion sparse.go
Expand Up @@ -54,13 +54,19 @@ func (s *sparseMatrix) Add(ngrams []ngram, classes []string) {
s.Classes[class].Add(s.N)
}

// add ngrams uniquely
added := make(map[string]int)
for _, ngram := range ngrams {
gramString := ngram.String()
if _, ok := s.Tokens[gramString]; !ok {
s.Tokens[gramString] = newSparseColumn()
}

s.Tokens[gramString].Add(s.N)
// only add the document index once for the ngram
if _, ok := added[gramString]; !ok {
added[gramString] = 1
s.Tokens[gramString].Add(s.N)
}
}
// increment the row counter
s.N++
Expand Down

0 comments on commit 480d31c

Please sign in to comment.