Skip to content

Commit

Permalink
[anchor] Add getL50()
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jul 1, 2018
1 parent f158333 commit 91a5e8c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
17 changes: 14 additions & 3 deletions anchor.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,12 @@ type PathSet map[*Path]bool
// Run kicks off the merging algorithm
func (r *Anchorer) Run() {
// Prepare the paths to run
flanksize := int64(1000000)
nIterations := 1
r.ExtractInterContigLinks()
flanksize := int64(1000000)
paths := r.makeTrivialPaths(r.contigs, flanksize)
for i := 0; i < nIterations; i++ {
r.iterativeGraphMerge(paths, flanksize)

// Attempt to split bad joins
r.makeContigStarts()
piler := r.inspectGaps(LinkDist)
Expand All @@ -118,9 +117,11 @@ func (r *Anchorer) iterativeGraphMerge(paths PathSet, flanksize int64) {
prevPaths := len(paths)
graphRemake := true
for prevPaths > 1 {
// flanksize = getL50(paths) / 4
if graphRemake {
i++
log.Noticef("Starting iteration %d with %d paths", i, len(paths))
log.Noticef("Starting iteration %d with %d paths (L50=%d)",
i, len(paths), getL50(paths))
G = r.makeGraph(paths)
}
CG := r.makeConfidenceGraph(G)
Expand Down Expand Up @@ -596,3 +597,13 @@ func (r *Anchorer) serialize(res int64, jsonfile, npyfile string) {
w.WriteInt32(C)
log.Noticef("Matrix (resolution=%d) written to `%s`", res, npyfile)
}

// getL50 computes the L50 of all component contigs within a path
func getL50(paths PathSet) int64 {
pathLengths := []int64{}
for path := range paths {
pathLengths = append(pathLengths, path.length)
}

return L50(pathLengths)
}
20 changes: 20 additions & 0 deletions base.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,26 @@ func median(s []float64) float64 {
return result
}

// L50 returns the sequence length L where half of the genome is covered in contigs
// of length >= L50
func L50(lengths []int64) int64 {
sortInt64s(lengths)
total := int64(0)
for _, length := range lengths {
total += length
}
halfTotal := total / 2
cumsize := int64(0)
i := len(lengths) - 1
for ; i >= 0; i-- {
cumsize += lengths[i]
if cumsize > halfTotal {
break
}
}
return lengths[i]
}

// OutlierCutoff implements Iglewicz and Hoaglin's robust, returns the cutoff values -
// lower bound and upper bound.
func OutlierCutoff(a []float64) (float64, float64) {
Expand Down
2 changes: 1 addition & 1 deletion graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (r *Anchorer) getUniquePaths() PathSet {
}
}

log.Noticef("%d paths (nComplex=%d nSingleton=%d), %d contigs (nComplex=%d nSingleton=%d)",
log.Noticef("%d paths (nComplex=%d nSingle=%d), %d contigs (nComplex=%d nSingle=%d)",
nComplex+nSingleton, nComplex, nSingleton,
nComplexContigs+nSingletonContigs, nComplexContigs, nSingletonContigs)

Expand Down

0 comments on commit 91a5e8c

Please sign in to comment.