Skip to content

Commit

Permalink
lca: add a new flag -b/--buffer-size to set the size of the line buff…
Browse files Browse the repository at this point in the history
…er. #75
  • Loading branch information
shenwei356 committed Mar 26, 2023
1 parent 13acdc1 commit a323e3f
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
- [TaxonKit v0.14.2](https://github.com/shenwei356/taxonkit/releases/tag/v0.14.2)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/taxonkit/v0.14.2/total.svg)](https://github.com/shenwei356/taxonkit/releases/tag/v0.14.2)
- `taxonkit lca`:
- add a new flag `-b/--buffer-size` to set the size of the line buffer. [#75](https://github.com/shenwei356/taxonkit/issues/75)
- [TaxonKit v0.14.1](https://github.com/shenwei356/taxonkit/releases/tag/v0.14.1)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/taxonkit/v0.14.1/total.svg)](https://github.com/shenwei356/taxonkit/releases/tag/v0.14.1)
- `taxonkit reformat`:
Expand Down
2 changes: 2 additions & 0 deletions doc/docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,8 @@ Usage:
taxonkit lca [flags]
Flags:
-b, --buffer-size string size of buffer, supported unit: K, M, G. You need increase the value when
"bufio.Scanner: token too long" error occured (default "1M")
-h, --help help for lca
-s, --separater string separater for TaxIds (default " ")
-D, --skip-deleted skip deleted TaxIds and compute with left ones
Expand Down
15 changes: 15 additions & 0 deletions taxonkit/cmd/lca.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"strconv"
"strings"

"github.com/shenwei356/util/bytesize"
"github.com/shenwei356/xopen"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -74,6 +75,15 @@ Examples:
skipDeleted := getFlagBool(cmd, "skip-deleted")
skipUnfound := getFlagBool(cmd, "skip-unfound")

bufferSizeS := getFlagString(cmd, "buffer-size")
if bufferSizeS == "" {
checkError(fmt.Errorf("value of buffer size. supported unit: K, M, G"))
}
bufferSize, err := bytesize.ParseByteSize(bufferSizeS)
if err != nil {
checkError(fmt.Errorf("invalid value of buffer size. supported unit: K, M, G"))
}

taxondb := loadTaxonomy(&config, false)
nodes := taxondb.Nodes
merged := taxondb.MergeNodes
Expand All @@ -83,12 +93,16 @@ Examples:
checkError(err)
defer outfh.Close()

buf := make([]byte, bufferSize)

taxids := make([]uint32, 0, 128)
for _, file := range files {
fh, err := xopen.Ropen(file)
checkError(err)

scanner := bufio.NewScanner(fh)
scanner.Buffer(buf, int(bufferSize))

var _taxid int
var line, item string
var items []string
Expand Down Expand Up @@ -188,6 +202,7 @@ func init() {
lcaCmd.Flags().StringP("separater", "s", " ", "separater for TaxIds")
lcaCmd.Flags().BoolP("skip-deleted", "D", false, "skip deleted TaxIds and compute with left ones")
lcaCmd.Flags().BoolP("skip-unfound", "U", false, "skip unfound TaxIds and compute with left ones")
lcaCmd.Flags().StringP("buffer-size", "b", "1M", `size of buffer, supported unit: K, M, G. You need increase the value when "bufio.Scanner: token too long" error occured`)

}

Expand Down
2 changes: 1 addition & 1 deletion taxonkit/cmd/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
)

// VERSION of taxonkit
const VERSION = "0.14.1"
const VERSION = "0.14.2"

// versionCmd represents the version command
var versionCmd = &cobra.Command{
Expand Down

0 comments on commit a323e3f

Please sign in to comment.