-
Notifications
You must be signed in to change notification settings - Fork 0
/
types.go
202 lines (176 loc) · 5.44 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
package bow
import (
"fmt"
"strings"
"github.com/ndaniels/esfragbag"
"github.com/TuftsBCB/io/pdb"
"github.com/TuftsBCB/io/pdbx"
"github.com/TuftsBCB/seq"
"github.com/TuftsBCB/structure"
)
// Bowed corresponds to a bag-of-words with meta data about its source.
// For example, a PDB chain can have a BOW computed for it. Meta data might
// include that chain's identifier (e.g., 1ctfA) and perhaps that chain's
// sequence.
//
// Values of this type correspond to records in a BOW database.
type Bowed struct {
// A globally unique identifier corresponding to the source of the bow.
// e.g., a PDB identifier "1ctf" or a PDB identifier with a chain
// identifier "1ctfA" or a sequence accession number.
Id string
// Arbitrary data associated with the source. May be empty.
Data []byte
// The bag-of-words.
Bow Bow
}
// StructureBower corresponds to Bower values that can provide BOWs given
// a structure fragment library.
type StructureBower interface {
// Computes a bag-of-words given a structure fragment library.
// For example, to compute the bag-of-words of a chain in a PDB entry:
//
// lib := someStructureFragmentLibrary()
// chain := somePdbChain()
// fmt.Println(BowerFromChain(chain).StructureBow(lib))
//
// This is made easier by using pre-defined types in this package that
// implement this interface.
StructureBow(lib fragbag.StructureLibrary) Bowed
}
type pdbChainStructure struct {
*pdb.Chain
}
// BowerFromChain provides a reference implementation of the StructureBower
// interface for PDB chains.
func BowerFromChain(c *pdb.Chain) StructureBower {
return pdbChainStructure{c}
}
func (c pdbChainStructure) id() string {
switch {
case len(c.Entry.Cath) > 0:
return c.Entry.Cath
case len(c.Entry.Scop) > 0:
return c.Entry.Scop
}
return fmt.Sprintf("%s%c", strings.ToLower(c.Entry.IdCode), c.Ident)
}
func (c pdbChainStructure) StructureBow(lib fragbag.StructureLibrary) Bowed {
return Bowed{
Id: c.id(),
Bow: StructureBow(lib, c.CaAtoms()),
}
}
type pdbModelStructure struct {
*pdb.Model
}
// BowerFromModel provides a reference implementation of the StructureBower
// interface for PDB models.
func BowerFromModel(c *pdb.Model) StructureBower {
return pdbModelStructure{c}
}
func (m pdbModelStructure) id() string {
switch {
case len(m.Entry.Scop) > 0:
return m.Entry.Scop
case len(m.Entry.Cath) > 0:
return m.Entry.Cath
}
return fmt.Sprintf("%s%c%d",
strings.ToLower(m.Entry.IdCode), m.Chain.Ident, m.Num)
}
func (m pdbModelStructure) StructureBow(lib fragbag.StructureLibrary) Bowed {
return Bowed{
Id: m.id(),
Bow: StructureBow(lib, m.CaAtoms()),
}
}
type cifChainStructure struct {
*pdbx.Chain
}
// BowerFromCifChain provides a reference implementation of the StructureBower
// interface for chains in PDBx/mmCIF formatted files.
func BowerFromCifChain(c *pdbx.Chain) StructureBower {
return cifChainStructure{c}
}
func (c cifChainStructure) id() string {
return fmt.Sprintf("%s%c", strings.ToLower(c.Entity.Entry.Id), c.Id)
}
func (c cifChainStructure) StructureBow(lib fragbag.StructureLibrary) Bowed {
return Bowed{
Id: c.id(),
Bow: StructureBow(lib, c.Models[0].AlphaCarbons),
}
}
// StructureBow is a helper function to compute a bag-of-words given a
// structure fragment library and a list of alpha-carbon atoms.
//
// If the lib given is a weighted library, then the Bow returned will also
// be weighted.
//
// Note that this function should only be used when providing your own
// implementation of the StructureBower interface. Otherwise, BOWs should
// be computed using the StructureBow method of the interface.
func StructureBow(lib fragbag.StructureLibrary, atoms []structure.Coords) Bow {
var best, uplimit int
b := NewBow(lib.Size())
libSize := lib.FragmentSize()
uplimit = len(atoms) - libSize
for i := 0; i <= uplimit; i++ {
best = lib.BestStructureFragment(atoms[i : i+libSize])
if best > -1 {
b.Freqs[best] += 1
}
}
if wlib, ok := lib.(fragbag.WeightedLibrary); ok {
b = b.Weighted(wlib)
}
return b
}
// SequenceBower corresponds to Bower values that can provide BOWs given
// a sequence fragment library.
type SequenceBower interface {
// Computes a bag-of-words given a sequence fragment library.
SequenceBow(lib fragbag.SequenceLibrary) Bowed
}
type sequence struct {
seq.Sequence
}
// BowerFromSequence provides a reference implementation of the SequenceBower
// interface for biological sequences.
func BowerFromSequence(s seq.Sequence) SequenceBower {
return sequence{s}
}
func (s sequence) SequenceBow(lib fragbag.SequenceLibrary) Bowed {
return Bowed{
Id: strings.Fields(s.Name)[0],
Data: s.Bytes(),
Bow: SequenceBow(lib, s.Sequence),
}
}
// SequenceBow is a helper function to compute a bag-of-words given a
// sequence fragment library and a query sequence.
//
// If the lib given is a weighted library, then the BOW returned will also
// be weighted.
//
// Note that this function should only be used when providing your own
// implementation of the SequenceBower interface. Otherwise, BOWs should
// be computed using the SequenceBow method of the interface.
func SequenceBow(lib fragbag.SequenceLibrary, s seq.Sequence) Bow {
var best, uplimit int
b := NewBow(lib.Size())
libSize := lib.FragmentSize()
uplimit = s.Len() - libSize
for i := 0; i <= uplimit; i++ {
best = lib.BestSequenceFragment(s.Slice(i, i+libSize))
if best < 0 {
continue
}
b.Freqs[best] += 1
}
if wlib, ok := lib.(fragbag.WeightedLibrary); ok {
b = b.Weighted(wlib)
}
return b
}