-
Notifications
You must be signed in to change notification settings - Fork 0
/
importcmd.go
172 lines (146 loc) · 4.02 KB
/
importcmd.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
// Copyright © 2022 J. Salvador Arias <jsalarias@gmail.com>
// All rights reserved.
// Distributed under BSD2 license that can be found in the LICENSE file.
// Package importcmd implements a command to import phylogenetic trees
// from a newick file into tsv files.
package importcmd
import (
"errors"
"fmt"
"io"
"os"
"github.com/js-arias/command"
"github.com/js-arias/timetree"
)
var Command = &command.Command{
Usage: `import [--age <value>] [-o|--output <file>]
--name <tree-name> [<newick-file>...]`,
Short: "import a newick tree",
Long: `
Command import reads one or more files that contain phylogenetic trees in
Newick format (i.e. parenthetical format), and import them into an equivalent
file in TSV format.
One or more newick files can be given as arguments. If no file is given the
input will be read from the standard input.
Trees in TSV format must have names. The flag --name is required and sets the
name of the tree. If multiple trees are found, the name will be append with
sequential numbers.
By default the output will be printed in the standard output. To define an
output file use the flag --output, or -o. If the file already exists, imported
trees will be added to the file.
The output TSV file will contain the following fields:
- tree, for the name of the tree
- node, for the ID of the node
- parent, for the ID of the parent node
(-1 is used for the root)
- age, the age of the node (in years)
- taxon, the taxonomic name of the node
By default, the age of the tree will be calculated using the maximum branch
length between the root and its terminals. Use the flag --age to set a
different age for the root (in million years). The given age should be greater
or equal to the maximum branch length.
`,
SetFlags: setFlags,
Run: run,
}
var output string
var age float64
var nameFlag string
func setFlags(c *command.Command) {
c.Flags().StringVar(&output, "output", "", "")
c.Flags().StringVar(&output, "o", "", "")
c.Flags().StringVar(&nameFlag, "name", "", "")
c.Flags().Float64Var(&age, "age", 0, "")
}
func run(c *command.Command, args []string) error {
if nameFlag == "" {
return c.UsageError("flag --name undefined")
}
coll, err := newTreeCollection()
if err != nil {
return err
}
if len(args) == 0 {
args = append(args, "-")
}
for i, a := range args {
nm := nameFlag
if i > 0 {
nm = fmt.Sprintf("%s.%d", nameFlag, i)
}
nc, err := readNewick(c.Stdin(), a, nm)
if err != nil {
return err
}
for _, tn := range nc.Names() {
t := nc.Tree(tn)
if err := coll.Add(t); err != nil {
return fmt.Errorf("when adding trees from %q: %v", a, err)
}
}
}
if err := writeTrees(c.Stdout(), coll); err != nil {
return err
}
return nil
}
func newTreeCollection() (*timetree.Collection, error) {
if output == "" {
return timetree.NewCollection(), nil
}
f, err := os.Open(output)
if errors.Is(err, os.ErrNotExist) {
return timetree.NewCollection(), nil
}
if err != nil {
return nil, err
}
defer f.Close()
c, err := timetree.ReadTSV(f)
if err != nil {
return nil, fmt.Errorf("while reading file %q: %v", output, err)
}
return c, nil
}
// millionYears is used transform the age flag
// (a float in million years)
// into an integer in years.
const millionYears = 1_000_000
func readNewick(r io.Reader, treeFile, name string) (*timetree.Collection, error) {
if treeFile != "-" {
f, err := os.Open(treeFile)
if err != nil {
return nil, err
}
defer f.Close()
r = f
} else {
treeFile = "stdin"
}
c, err := timetree.Newick(r, name, int64(age*millionYears))
if err != nil {
return nil, fmt.Errorf("while reading file %q: %v", treeFile, err)
}
return c, nil
}
func writeTrees(w io.Writer, c *timetree.Collection) (err error) {
outName := "stdout"
if output != "" {
outName = output
f, err := os.Create(output)
if err != nil {
return err
}
defer func() {
e := f.Close()
if e != nil && err == nil {
err = e
}
}()
w = f
}
if err := c.TSV(w); err != nil {
return fmt.Errorf("while writing to %q: %v", outName, err)
}
return nil
}