Skip to content

Commit

Permalink
[partition] Add 1 pseudocount to RE counts; Also lower the MinRE cutoff
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jul 26, 2018
1 parent 51110c2 commit ddf6835
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
3 changes: 2 additions & 1 deletion base.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ const (
// *** The following parameters are modeled after LACHESIS ***

// MinREs is the minimum number of RE sites in a contig to be clustered (CLUSTER_MIN_RE_SITES)
MinREs = 25
// MinREs = 25

// MaxLinkDensity is the density threshold before marking a contig as 'repetitve' (CLUSTER_MAX_LINK_DENSITY)
MaxLinkDensity = 2
// NonInformativeRatio is the cutoff for recovering skipped contigs back into the clusters (CLUSTER_NONINFORMATIVE_RATIO)
Expand Down
3 changes: 2 additions & 1 deletion extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ func (r *Extracter) readFastaAndWriteRE() {
}

name := string(rec.Name)
count := bytes.Count(rec.Seq.Seq, []byte(r.RE))
// Add pseudocount of 1 to prevent division by zero
count := bytes.Count(rec.Seq.Seq, []byte(r.RE)) + 1
length := rec.Seq.Length()
totalCounts += count
totalBp += int64(length)
Expand Down
22 changes: 21 additions & 1 deletion partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ package allhic
import (
"fmt"
"math"
"path"
"strconv"
"strings"
)

// Partitioner converts the bamfile into a matrix of link counts
Expand Down Expand Up @@ -61,10 +63,28 @@ func (r *Partitioner) makeTrivialClusters() {
r.clusters = clusters
}

// getRE extracts the restriction enzyme from the file name
func (r *Partitioner) getRE() string {
s := path.Base(r.Contigsfile)
return strings.Split(RemoveExt(s), "_")[1]
}

// skipContigsWithFewREs skip contigs with fewere than MinREs
// This reads in the `counts_RE.txt` file generated by extract()
func (r *Partitioner) skipContigsWithFewREs() {
log.Noticef("skipContigsWithFewREs with MinREs = %d", MinREs)
RE := r.getRE()
MinREs := 25
switch len(RE) {
case 4:
MinREs = 25 // 25 * (4 ** 4) = 6Kb
case 5:
MinREs = 8 // 8 * (4 ** 5) = 8Kb
case 6:
MinREs = 3 // 3 * (4 ** 6) = 12Kb
default:
MinREs = 2 // We normally only use RE between 4 to 6 base cutter, but just in case
}
log.Noticef("skipContigsWithFewREs with MinREs = %d (RE = %s)", MinREs, RE)
nShort := 0
shortRE := 0
shortLen := 0
Expand Down

0 comments on commit ddf6835

Please sign in to comment.