Skip to content

Commit

Permalink
v0.3.8
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Nov 24, 2016
1 parent 58db66a commit fb30178
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 31 deletions.
15 changes: 9 additions & 6 deletions doc/docs/download.md
Expand Up @@ -6,13 +6,10 @@ SeqKit is implemented in [Go](https://golang.org/) programming language,

## Latest Version

[SeqKit v0.3.7](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.7/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
[SeqKit v0.3.8](https://github.com/shenwei356/seqkit/releases/tag/v0.3.8)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.8/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.8)

- fix bug in `seqkit split --by-id` when sequence ID contains invalid characters for system path.
- add more flags validation for `seqkit replace`.
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files and more controls are added.
- changes: do not wrap sequence and quality in output for FASTQ format.
- enhancement of `seqkit common`: better handling of files containing replicated sequences

***64-bit versions are highly recommended.***

Expand Down Expand Up @@ -94,6 +91,12 @@ For Go developer, just one command:

## Release History

- [SeqKit v0.3.7](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.7/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
- fix bug in `seqkit split --by-id` when sequence ID contains invalid characters for system path.
- add more flags validation for `seqkit replace`.
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files and more controls are added.
- changes: do not wrap sequence and quality in output for FASTQ format.
- [SeqKit v0.3.6](https://github.com/shenwei356/seqkit/releases/tag/v0.3.6)
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.6/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.6)
- add new feature for `seqkit grep`: new flag `-R` (`--region`) for specifying sequence region for searching.
Expand Down
71 changes: 48 additions & 23 deletions seqkit/cmd/common.go
Expand Up @@ -42,9 +42,6 @@ var commonCmd = &cobra.Command{
`,
Run: func(cmd *cobra.Command, args []string) {
if len(args) < 2 {
checkError(errors.New("at least 2 files needed"))
}
config := getConfigs(cmd)
alphabet := config.Alphabet
idRegexp := config.IDRegexp
Expand All @@ -68,21 +65,34 @@ var commonCmd = &cobra.Command{
}

files := getFileList(args)
if len(files) < 2 {
checkError(errors.New("at least 2 files needed"))
}

outfh, err := xopen.Wopen(outFile)
checkError(err)
defer outfh.Close()

counter := make(map[string]map[string]int)
names := make(map[string]map[string]string)
// target -> file -> struct{}
counter := make(map[string]map[string]struct{})
// target -> file -> seqname
names := make(map[string]map[string][]string)

var fastxReader *fastx.Reader

// read all files
var subject string
var checkFirstFile = true
var firstFile string
for _, file := range files {
if !quiet {
log.Infof("read file: %s", file)
}
if checkFirstFile {
firstFile = file
checkFirstFile = false
}

fastxReader, err = fastx.NewReader(alphabet, file, idRegexp)
checkError(err)
for {
Expand Down Expand Up @@ -124,42 +134,56 @@ var commonCmd = &cobra.Command{
}

if _, ok := counter[subject]; !ok {
counter[subject] = make(map[string]int)
counter[subject] = make(map[string]struct{})
}
counter[subject][file] = counter[subject][file] + 1
counter[subject][file] = struct{}{}

if _, ok := names[subject]; !ok {
names[subject] = make(map[string]string)
names[subject] = make(map[string][]string)
}
names[subject][file] = string(record.Name)
names[subject][file] = append(names[subject][file], string(record.Name))
}
}

// find common seqs
if !quiet {
log.Info("find common seqs ...")
}
fileNum := len(args)
firstFile := args[0]
namesOK := make(map[string]int)
n := 0
for subject, count := range counter {
if len(count) != fileNum {
fileNum := len(files)
namesOK := make(map[string]struct{})
var n, n2 int
var seqname string
for subject, presence := range counter {
if len(presence) != fileNum {
continue
}
namesOK[names[subject][firstFile]] = counter[subject][firstFile]

n++
}
if !quiet {
log.Infof("%d common seqs found", n)
for _, seqname = range names[subject][firstFile] {
n2++
namesOK[seqname] = struct{}{}
}
}

var t string
if byName {
t = "sequence headers"
} else if bySeq {
t = "sequences"
} else {
t = "sequence IDs"
}
if n == 0 {
log.Infof("no common %s found", t)
return
}
if !quiet {
log.Infof("%d unique %s found in %d files, which belong to %d records in the first file: %s",
n, t, fileNum, len(namesOK), firstFile)
}

if !quiet {
log.Infof("extract common seqs from first file: %s", firstFile)
log.Infof("extract seqs from the first file: %s", firstFile)
}

// extract
Expand All @@ -174,11 +198,12 @@ var commonCmd = &cobra.Command{
checkError(err)
break
}
if fastxReader.IsFastq {
config.LineWidth = 0
}

name := string(record.Name)
if _, ok := namesOK[name]; ok && namesOK[name] > 0 {
if _, ok := namesOK[string(record.Name)]; ok {
record.FormatToWriter(outfh, lineWidth)
namesOK[name] = 0
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion seqkit/cmd/helper.go
Expand Up @@ -43,7 +43,7 @@ import (
)

// VERSION of seqkit
const VERSION = "0.3.7"
const VERSION = "0.3.8"

func checkError(err error) {
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion seqkit/download_all_binaries.sh
@@ -1,7 +1,7 @@
#!/bin/sh

rm seqkit_*.tar.gz
version="0.3.5"
version="0.3.8"

wget https://github.com/shenwei356/seqkit/releases/download/v$version/seqkit_linux_386.tar.gz
wget https://github.com/shenwei356/seqkit/releases/download/v$version/seqkit_linux_amd64.tar.gz
Expand Down

0 comments on commit fb30178

Please sign in to comment.