/
index.go
127 lines (114 loc) · 2.26 KB
/
index.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package main
import (
"errors"
"fmt"
"github.com/RoaringBitmap/gocroaring"
)
type BitProfiles struct {
Genes *BitArray
Alleles *gocroaring.Bitmap
Ready bool
}
type AlleleKey struct {
Allele interface{}
Gene int
}
type Tokeniser struct {
lookup map[AlleleKey]uint32
nextValue chan uint32
lastValue uint32
}
func NewTokeniser() *Tokeniser {
t := Tokeniser{
nextValue: make(chan uint32),
lookup: make(map[AlleleKey]uint32),
}
go func() {
var i uint32
for i = 0; ; i++ {
t.nextValue <- i
}
}()
return &t
}
func (t *Tokeniser) Get(key AlleleKey) uint32 {
if value, ok := t.lookup[key]; ok {
return value
}
value := <-t.nextValue
t.lookup[key] = value
t.lastValue = value
return value
}
type ProfilesMap struct {
lookup map[CgmlstSt]int
indices []BitProfiles
schemeSize uint32
}
type Indexer struct {
geneTokens *Tokeniser
alleleTokens *Tokeniser
index *ProfilesMap
}
func NewIndexer(STs []CgmlstSt) (i *Indexer) {
nSts := len(STs)
lookup := make(map[CgmlstSt]int)
for idx, st := range STs {
lookup[st] = idx
}
return &Indexer{
geneTokens: NewTokeniser(),
alleleTokens: NewTokeniser(),
index: &ProfilesMap{
indices: make([]BitProfiles, nSts),
lookup: lookup,
schemeSize: ALMOST_INF,
},
}
}
// Index returns true if already indexed
func (i *Indexer) Index(profile *Profile) (bool, error) {
var (
offset int
ok bool
index *BitProfiles
)
if offset, ok = i.index.lookup[profile.ST]; !ok {
return false, errors.New("Missing ST during indexing")
}
index = &i.index.indices[offset]
if index.Ready {
return true, nil
}
index.Genes = NewBitArray(2500)
index.Alleles = gocroaring.New()
var bit uint32
for gene, allele := range profile.Matches {
if allele == "" {
continue
}
bit = i.alleleTokens.Get(AlleleKey{
allele,
gene,
})
index.Alleles.Add(bit)
bit := i.geneTokens.Get(AlleleKey{
nil,
gene,
})
index.Genes.SetBit(uint64(bit))
}
index.Ready = true
if profile.schemeSize < i.index.schemeSize {
i.index.schemeSize = profile.schemeSize
}
return false, nil
}
func (i *ProfilesMap) Complete() error {
for st, idx := range i.lookup {
if !i.indices[idx].Ready {
return fmt.Errorf("didn't see a profile for ST '%s'", st)
}
}
return nil
}