-
Notifications
You must be signed in to change notification settings - Fork 0
/
fasta.go
142 lines (125 loc) · 2.86 KB
/
fasta.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Package fasta provides types to read and write FASTA-encoded files.
package fasta
import (
"bufio"
"bytes"
"errors"
"io"
)
// Sequence is the common interface for a sequence that can be represented in
// FASTA encoding.
type Sequence interface {
Name() string
Seq() []byte
}
// Record is a concrete implementation of Sequence and corresponds to a token
// in a FASTA encoded file.
type Record struct {
Header string
Sequence []byte
}
// Name returns the record header.
func (rec *Record) Name() string {
return rec.Header
}
// Seq returns the record sequence.
func (rec *Record) Seq() []byte {
return rec.Sequence
}
// A Reader reads FASTA encoded sequences.
type Reader struct {
r *bufio.Reader
err error
rec *Record
}
// NewReader returns a new reader that reads from f.
func NewReader(f io.Reader) *Reader {
return &Reader{r: bufio.NewReader(f)}
}
// Read returns a FASTA record from r. Read always returns either a non-nil
// record or a non-nil error, but not both. After reaching EOF, subsequent
// calls to Read will return a nil record and io.EOF.
func (r *Reader) Read() (*Record, error) {
// Keep returning EOF after EOF reached.
if r.err == io.EOF {
return nil, io.EOF
}
for {
line, err := r.r.ReadBytes('\n')
if err != nil {
if err != io.EOF {
return nil, err
}
// If no newline at end of file.
if len(line) > 0 {
r.rec.Sequence = append(r.rec.Sequence, line...)
}
r.err = io.EOF
return r.rec, nil
}
line = bytes.TrimSpace(line)
if len(line) == 0 { // Skip empty lines.
continue
}
if line[0] != '>' {
if r.rec == nil { // reached sequence before the first header.
return nil, errors.New("fasta: format error: sequence before header")
}
r.rec.Sequence = append(r.rec.Sequence, line...)
continue
}
temp := r.rec
r.rec = &Record{
Header: string(line[1:]),
Sequence: make([]byte, 0),
}
if temp != nil {
return temp, nil
}
}
}
// A Writer writes sequences in a FASTA format.
type Writer struct {
w io.Writer
width int
}
// NewWriter returns a new FASTA format writer that writes to w.
func NewWriter(w io.Writer, width int) *Writer {
if width == 0 {
width = 1
}
return &Writer{
w: w,
width: width,
}
}
// Write writes a single sequence in w. It return the number of bytes written
// and any error.
func (w *Writer) Write(s Sequence) (n int, err error) {
var (
_n int
)
// Write the header.
n, err = w.w.Write([]byte(">" + s.Name()))
if err != nil {
return n, err
}
// Write the sequence (width letters at each line).
for i := 0; i < len(s.Seq()); i++ {
if i%w.width == 0 {
_n, err = w.w.Write([]byte("\n"))
if n += _n; err != nil {
return n, err
}
}
_n, err = w.w.Write([]byte{s.Seq()[i]})
if n += _n; err != nil {
return n, err
}
}
_n, err = w.w.Write([]byte("\n"))
if n += _n; err != nil {
return n, err
}
return n, nil
}