forked from BurntSushi/cablastp
/
translate.go
124 lines (114 loc) · 2.89 KB
/
translate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package cablastp
import (
"bytes"
// "compress/gzip"
"io"
// "os"
"fmt"
"github.com/TuftsBCB/io/fasta"
"github.com/TuftsBCB/seq"
"strings"
)
type SearchOperator func(*bytes.Reader) (*bytes.Reader, error)
func TranslateQuerySeqs(
query *bytes.Reader, action SearchOperator) (*bytes.Reader, error) {
buf := new(bytes.Buffer)
f := fasta.NewWriter(buf)
reader := fasta.NewReader(query)
for i := 0; true; i++ {
sequence, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
origSeq := sequence.Bytes()
n := sequence.Name
// generate 6 ORFs
transSeqs := Translate(origSeq)
for _, s := range transSeqs {
result := seq.NewSequenceString(n, string(Reduce(s)))
f.Write(result)
}
}
return bytes.NewReader(buf.Bytes()), nil
}
func Translate(sequence []byte) [][]byte {
l := len(sequence)
results := make([][]byte, 0, 6)
// three ORFs
for orf := 0; orf <= 2; orf++ {
var result []byte
// forward direction
for i := orf; i < (l - 2); i += 3 {
var codon []byte
codon = sequence[i : i+3]
trans := translate1(codon)
if trans == '_' {
// elide stop codons for now?
continue
}
result = append(result, trans)
}
results = append(results, result)
// reverse complement
result = make([]byte, 0)
for i := (l - 3 - orf); i >= 0; i -= 3 {
codon := make([]byte, 3)
rCodon := sequence[i : i+3]
codon[0] = complement(rCodon[2])
codon[1] = complement(rCodon[1])
codon[2] = complement(rCodon[0])
trans := translate1(codon)
if trans == '_' {
// elide stop codons
continue
}
result = append(result, trans)
}
results = append(results, result)
}
return results
}
func translate1(codon []byte) byte {
// if code contains an 'N' -> 'X'
if strings.ContainsRune(string(codon), 'N') {
return 'X'
}
// otherwise, look up in hash
var genCode = map[string]byte{
"ATA": 'I', "ATC": 'I', "ATT": 'I', "ATG": 'M',
"ACA": 'T', "ACC": 'T', "ACG": 'T', "ACT": 'T',
"AAC": 'N', "AAT": 'N', "AAA": 'K', "AAG": 'K',
"AGC": 'S', "AGT": 'S', "AGA": 'R', "AGG": 'R',
"CTA": 'L', "CTC": 'L', "CTG": 'L', "CTT": 'L',
"CCA": 'P', "CCC": 'P', "CCG": 'P', "CCT": 'P',
"CAC": 'H', "CAT": 'H', "CAA": 'Q', "CAG": 'Q',
"CGA": 'R', "CGC": 'R', "CGG": 'R', "CGT": 'R',
"GTA": 'V', "GTC": 'V', "GTG": 'V', "GTT": 'V',
"GCA": 'A', "GCC": 'A', "GCG": 'A', "GCT": 'A',
"GAC": 'D', "GAT": 'D', "GAA": 'E', "GAG": 'E',
"GGA": 'G', "GGC": 'G', "GGG": 'G', "GGT": 'G',
"TCA": 'S', "TCC": 'S', "TCG": 'S', "TCT": 'S',
"TTC": 'F', "TTT": 'F', "TTA": 'L', "TTG": 'L',
"TAC": 'Y', "TAT": 'Y', "TAA": '*', "TAG": '*',
"TGC": 'C', "TGT": 'C', "TGA": '*', "TGG": 'W',
}
return genCode[string(codon)]
}
func complement(char byte) byte {
switch char {
case 'A':
return 'T'
case 'T':
return 'A'
case 'C':
return 'G'
case 'G':
return 'C'
case 'N':
return 'N'
}
panic(fmt.Sprintf("bad letter: %c", char))
}