Skip to content

Commit

Permalink
v0.3.7
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Nov 23, 2016
1 parent 676130e commit 58db66a
Show file tree
Hide file tree
Showing 16 changed files with 145 additions and 70 deletions.
5 changes: 3 additions & 2 deletions doc/docs/download.md
Expand Up @@ -10,8 +10,9 @@ SeqKit is implemented in [Go](https://golang.org/) programming language,
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.7/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)

- fix bug in `seqkit split --by-id` when sequence ID contains invalid characters for system path.
- add more flags validation for `seqkit replace`
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files
- add more flags validation for `seqkit replace`.
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files and more controls are added.
- changes: do not wrap sequence and quality in output for FASTQ format.

***64-bit versions are highly recommended.***

Expand Down
19 changes: 11 additions & 8 deletions doc/docs/usage.md
Expand Up @@ -99,7 +99,7 @@ Usage
```
SeqKit -- a cross-platform and ultrafast toolkit for FASTA/Q file manipulation
Version: 0.3.6
Version: 0.3.7
Author: Wei Shen <shenwei356@gmail.com>
Expand Down Expand Up @@ -1062,18 +1062,21 @@ more on: http://bioinf.shenwei.me/seqkit/usage/#replace
Special replacement symbols (only for replacing name not sequence):
{nr} Record number, starting from 1
{kv} Corresponding value of the key ($1) by key-value file
{kv} Corresponding value of the key (captured variable $n) by key-value file,
n can be specified by flag -I (--key-capt-idx) (default: 1)
Usage:
seqkit replace [flags]
Flags:
-s, --by-seq replace seq
-i, --ignore-case ignore case
-K, --keep-key keep the key as value when no value found for the key (only for sequence name)
-k, --kv-file string tab-delimited key-value file for replacing key with value when using "{kv}" in -r (--replacement) (only for sequence name)
-p, --pattern string search regular expression
-r, --replacement string replacement. supporting capture variables. e.g. $1 represents the text of the first submatch. ATTENTION: use SINGLE quote NOT double quotes in *nix OS or use the \ escape character. Record number is also supported by "{nr}"
-s, --by-seq replace seq
-i, --ignore-case ignore case
-K, --keep-key keep the key as value when no value found for the key (only for sequence name)
-I, --key-capt-idx int capture variable index of key (1-based) (default 1)
--key-miss-repl string replacement for key with no corresponding value
-k, --kv-file string tab-delimited key-value file for replacing key with value when using "{kv}" in -r (--replacement) (only for sequence name)
-p, --pattern string search regular expression
-r, --replacement string replacement. supporting capture variables. e.g. $1 represents the text of the first submatch. ATTENTION: for *nix OS, use SINGLE quote NOT double quotes or use the \ escape character. Record number is also supported by "{nr}".use ${1} instead of $1 when {kv} given!
```

Expand Down
2 changes: 1 addition & 1 deletion doc/site
Submodule site updated from 9130ba to 3e225d
9 changes: 7 additions & 2 deletions seqkit/cmd/grep.go
Expand Up @@ -51,8 +51,8 @@ Examples:
config := getConfigs(cmd)
alphabet := config.Alphabet
idRegexp := config.IDRegexp
lineWidth := config.LineWidth
outFile := config.OutFile
lineWidth := config.LineWidth
seq.AlphabetGuessSeqLenghtThreshold = config.AlphabetGuessSeqLength
seq.ValidateSeq = false
runtime.GOMAXPROCS(config.Threads)
Expand Down Expand Up @@ -184,6 +184,9 @@ Examples:
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if byName {
subject = record.Name
Expand Down Expand Up @@ -228,8 +231,10 @@ Examples:
}
}

record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
}

config.LineWidth = lineWidth
}
},
}
Expand Down
8 changes: 7 additions & 1 deletion seqkit/cmd/head.go
Expand Up @@ -59,6 +59,7 @@ var headCmd = &cobra.Command{
for _, file := range files {
fastxReader, err := fastx.NewReader(alphabet, file, idRegexp)
checkError(err)

for {
record, err := fastxReader.Read()
if err != nil {
Expand All @@ -68,13 +69,18 @@ var headCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}
i++
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)

if number == i {
return
}
}

config.LineWidth = lineWidth
}
},
}
Expand Down
7 changes: 6 additions & 1 deletion seqkit/cmd/rename.go
Expand Up @@ -71,6 +71,9 @@ var renameCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if byName {
k = string(record.Name)
Expand All @@ -86,8 +89,10 @@ var renameCmd = &cobra.Command{
numbers[k] = 1
}

record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
}

config.LineWidth = lineWidth
}
},
}
Expand Down
23 changes: 17 additions & 6 deletions seqkit/cmd/replace.go
Expand Up @@ -57,7 +57,8 @@ more on: http://bioinf.shenwei.me/seqkit/usage/#replace
Special replacement symbols (only for replacing name not sequence):
{nr} Record number, starting from 1
{kv} Corresponding value of the key ($1) by key-value file
{kv} Corresponding value of the key (captured variable $n) by key-value file,
n can be specified by flag -I (--key-capt-idx) (default: 1)
`,
Run: func(cmd *cobra.Command, args []string) {
Expand All @@ -75,6 +76,7 @@ Special replacement symbols (only for replacing name not sequence):
kvFile := getFlagString(cmd, "kv-file")
keepKey := getFlagBool(cmd, "keep-key")
keyCaptIdx := getFlagPositiveInt(cmd, "key-capt-idx")
keyMissRepl := getFlagString(cmd, "key-miss-repl")

bySeq := getFlagBool(cmd, "by-seq")
// byName := getFlagBool(cmd, "by-name")
Expand Down Expand Up @@ -161,6 +163,9 @@ Special replacement symbols (only for replacing name not sequence):
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

nr++
if bySeq {
Expand All @@ -179,6 +184,9 @@ Special replacement symbols (only for replacing name not sequence):
}
if len(founds) > 0 {
found = founds[0]
if keyCaptIdx > len(found)-1 {
checkError(fmt.Errorf("value of flag -I (--key-capt-idx) overflows"))
}
k = string(found[keyCaptIdx])
if ignoreCase {
k = strings.ToLower(k)
Expand All @@ -188,17 +196,18 @@ Special replacement symbols (only for replacing name not sequence):
} else if keepKey {
r = reKV.ReplaceAll(r, found[keyCaptIdx])
} else {
r = reKV.ReplaceAll(r, []byte(""))
r = reKV.ReplaceAll(r, []byte(keyMissRepl))
}
}
}

record.Name = patternRegexp.ReplaceAll(record.Name, r)
}

record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
}

config.LineWidth = lineWidth
}
},
}
Expand All @@ -209,15 +218,17 @@ func init() {
replaceCmd.Flags().StringP("replacement", "r", "",
"replacement. supporting capture variables. "+
" e.g. $1 represents the text of the first submatch. "+
"ATTENTION: use SINGLE quote NOT double quotes in *nix OS or "+
`use the \ escape character. Record number is also supported by "{nr}"`)
"ATTENTION: for *nix OS, use SINGLE quote NOT double quotes or "+
`use the \ escape character. Record number is also supported by "{nr}".`+
`use ${1} instead of $1 when {kv} given!`)
// replaceCmd.Flags().BoolP("by-name", "n", false, "replace full name instead of just id")
replaceCmd.Flags().BoolP("by-seq", "s", false, "replace seq")
replaceCmd.Flags().BoolP("ignore-case", "i", false, "ignore case")
replaceCmd.Flags().StringP("kv-file", "k", "",
`tab-delimited key-value file for replacing key with value when using "{kv}" in -r (--replacement) (only for sequence name)`)
replaceCmd.Flags().BoolP("keep-key", "K", false, "keep the key as value when no value found for the key (only for sequence name)")
replaceCmd.Flags().IntP("key-capt-idx", "I", 1, "capture variable index of key")
replaceCmd.Flags().IntP("key-capt-idx", "I", 1, "capture variable index of key (1-based)")
replaceCmd.Flags().StringP("key-miss-repl", "", "", "replacement for key with no corresponding value")
}

var reNR = regexp.MustCompile(`\{(NR|nr)\}`)
Expand Down
9 changes: 7 additions & 2 deletions seqkit/cmd/rmdup.go
Expand Up @@ -96,6 +96,9 @@ var rmdupCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if bySeq {
if ignoreCase {
Expand All @@ -121,13 +124,13 @@ var rmdupCmd = &cobra.Command{
counter[subject]++
removed++
if len(dupFile) > 0 {
outfhDup.Write(record.Format(lineWidth))
outfhDup.Write(record.Format(config.LineWidth))
}
if len(numFile) > 0 {
names[subject] = append(names[subject], string(record.ID))
}
} else { // new one
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
counter[subject]++

if len(numFile) > 0 {
Expand All @@ -136,6 +139,8 @@ var rmdupCmd = &cobra.Command{
}
}
}

config.LineWidth = lineWidth
}
if removed > 0 && len(numFile) > 0 {
outfhNum, err := xopen.Wopen(numFile)
Expand Down
14 changes: 10 additions & 4 deletions seqkit/cmd/sample.go
Expand Up @@ -47,7 +47,7 @@ var sampleCmd = &cobra.Command{
config := getConfigs(cmd)
alphabet := config.Alphabet
idRegexp := config.IDRegexp
lineWidth := config.LineWidth
// lineWidth := config.LineWidth
outFile := config.OutFile
quiet := config.Quiet
seq.AlphabetGuessSeqLenghtThreshold = config.AlphabetGuessSeqLength
Expand Down Expand Up @@ -119,10 +119,13 @@ var sampleCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if rand.Float64() <= proportion {
n++
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
if n == number {
break LOOP
}
Expand All @@ -137,7 +140,7 @@ var sampleCmd = &cobra.Command{
for _, record := range records {
if rand.Float64() <= proportion {
n++
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
if n == number {
break
}
Expand All @@ -160,10 +163,13 @@ var sampleCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if rand.Float64() <= proportion {
n++
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
}
}
}
Expand Down
16 changes: 10 additions & 6 deletions seqkit/cmd/seq.go
Expand Up @@ -108,6 +108,9 @@ var seqCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

if checkSeqType {
if len(record.Seq.Qual) > 0 {
Expand Down Expand Up @@ -212,12 +215,12 @@ var seqCmd = &cobra.Command{
}

if len(sequence.Seq) <= pageSize {
outfh.Write(byteutil.WrapByteSlice(sequence.Seq, lineWidth))
outfh.Write(byteutil.WrapByteSlice(sequence.Seq, config.LineWidth))
} else {
if bufferedByteSliceWrapper == nil {
bufferedByteSliceWrapper = byteutil.NewBufferedByteSliceWrapper2(1, len(sequence.Seq), lineWidth)
bufferedByteSliceWrapper = byteutil.NewBufferedByteSliceWrapper2(1, len(sequence.Seq), config.LineWidth)
}
text, b = bufferedByteSliceWrapper.Wrap(sequence.Seq, lineWidth)
text, b = bufferedByteSliceWrapper.Wrap(sequence.Seq, config.LineWidth)
outfh.Write(text)
outfh.Flush()
bufferedByteSliceWrapper.Recycle(b)
Expand Down Expand Up @@ -245,12 +248,12 @@ var seqCmd = &cobra.Command{
}

if len(sequence.Qual) <= pageSize {
outfh.Write(byteutil.WrapByteSlice(sequence.Qual, lineWidth))
outfh.Write(byteutil.WrapByteSlice(sequence.Qual, config.LineWidth))
} else {
if bufferedByteSliceWrapper == nil {
bufferedByteSliceWrapper = byteutil.NewBufferedByteSliceWrapper2(1, len(sequence.Qual), lineWidth)
bufferedByteSliceWrapper = byteutil.NewBufferedByteSliceWrapper2(1, len(sequence.Qual), config.LineWidth)
}
text, b = bufferedByteSliceWrapper.Wrap(sequence.Qual, lineWidth)
text, b = bufferedByteSliceWrapper.Wrap(sequence.Qual, config.LineWidth)
outfh.Write(text)
outfh.Flush()
bufferedByteSliceWrapper.Recycle(b)
Expand All @@ -260,6 +263,7 @@ var seqCmd = &cobra.Command{
}
}

config.LineWidth = lineWidth
}

outfh.Close()
Expand Down
7 changes: 5 additions & 2 deletions seqkit/cmd/shuffle.go
Expand Up @@ -55,7 +55,7 @@ Secondly, seqkit shuffles sequence IDs and extract sequences by FASTA index.
config := getConfigs(cmd)
alphabet := config.Alphabet
idRegexp := config.IDRegexp
lineWidth := config.LineWidth
// lineWidth := config.LineWidth
outFile := config.OutFile
quiet := config.Quiet
seq.AlphabetGuessSeqLenghtThreshold = config.AlphabetGuessSeqLength
Expand Down Expand Up @@ -92,6 +92,9 @@ Secondly, seqkit shuffles sequence IDs and extract sequences by FASTA index.
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

sequences[string(record.Name)] = record.Clone()
index2name[i] = string(record.Name)
Expand Down Expand Up @@ -121,7 +124,7 @@ Secondly, seqkit shuffles sequence IDs and extract sequences by FASTA index.
var record *fastx.Record
for _, i := range indices {
record = sequences[index2name[i]]
record.FormatToWriter(outfh, lineWidth)
record.FormatToWriter(outfh, config.LineWidth)
}
return
}
Expand Down
7 changes: 6 additions & 1 deletion seqkit/cmd/sliding.go
Expand Up @@ -82,6 +82,9 @@ var slidingCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

originalLen = len(record.Seq.Seq)
sequence = record.Seq.Seq
Expand Down Expand Up @@ -115,9 +118,11 @@ var slidingCmd = &cobra.Command{
r, _ = fastx.NewRecordWithoutValidation(record.Seq.Alphabet,
[]byte{}, []byte(fmt.Sprintf("%s_sliding:%d-%d", record.ID, i+1, e)), s)
}
r.FormatToWriter(outfh, lineWidth)
r.FormatToWriter(outfh, config.LineWidth)
}
}

config.LineWidth = lineWidth
}
},
}
Expand Down

0 comments on commit 58db66a

Please sign in to comment.