forked from brentp/bigly
/
sa.go
108 lines (98 loc) · 1.94 KB
/
sa.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package bigly
import (
"bytes"
"fmt"
"os"
"strconv"
"github.com/biogo/hts/sam"
)
// SA holds the attributes in a SAM SA:Z tag.
type SA struct {
Chrom []byte
Pos int
Strand bool
Cigar []byte
MapQ uint8
NM uint16
end int
Parsed sam.Cigar // parsed Cigar
}
func RecordToSA(r *sam.Record) *SA {
return &SA{Chrom: []byte(r.Ref.Name()), Pos: r.Start(), Strand: r.Strand() != -1, Parsed: r.Cigar}
}
func AsSAs(r *sam.Record, cigs []byte) []*SA {
sas := ParseSAs(cigs)
sas = append(sas, RecordToSA(r))
return sas
}
// End returns the end of the Cigar string
func (s *SA) End() int {
if s.end != 0 {
return s.end
}
var err error
if s.Parsed == nil {
s.Parsed, err = sam.ParseCigar(s.Cigar)
}
if err != nil {
fmt.Fprintf(os.Stderr, "warning bad cigar in SA tag: %s\n", s.Cigar)
return s.Pos
}
s.end = s.Pos
for _, co := range s.Parsed {
if t := co.Type(); t != sam.CigarBack {
s.end += co.Len() * t.Consumes().Reference
}
}
return s.end
}
func ParseSAs(s []byte) []*SA {
if s[len(s)-1] == ';' {
s = s[:len(s)-1]
}
if len(s) > 3 && s[0] == 'S' && s[1] == 'A' && s[2] == 'Z' {
s = s[3:]
}
ss := bytes.Split(s, []byte{';'})
sas := make([]*SA, len(ss), len(ss)+1)
for i, sa := range ss {
tmp := ParseSA(sa)
sas[i] = &tmp
}
return sas
}
// ParseSA returns an SA struct from the bytes
func ParseSA(sa []byte) SA {
// "7,70999871,+,117S83M50S,42,8"
//parts := bytes.SplitN(sa, []byte{','}, 6)
var s SA
off := 0
for i := 0; i < 6; i++ {
next := off + bytes.Index(sa[off:], []byte{','})
if i == 5 && next < off {
next = len(sa)
}
switch i {
case 0:
s.Chrom = sa[:next]
case 1, 4, 5:
p, err := strconv.Atoi(string(sa[off:next]))
if err != nil {
panic(err)
}
if i == 1 {
s.Pos = p - 1
} else if i == 4 {
s.MapQ = uint8(p)
} else {
s.NM = uint16(p)
}
case 3:
s.Cigar = sa[off:next]
case 2:
s.Strand = sa[off] != '-'
}
off = 1 + next
}
return s
}