Skip to content

Commit

Permalink
[anchor] Add iterative merges (not working yet)
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jun 26, 2018
1 parent 356427f commit 1c635b9
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 52 deletions.
82 changes: 46 additions & 36 deletions anchor.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,49 @@ type Link struct {
apos, bpos int // Positions
}

// iterations controls how many merges are we doing
const iterations = 2

// Run kicks off the merging algorithm
func (r *Anchorer) Run() {
var G Graph
r.ExtractInterContigLinks()
G := r.makeGraph()
G = r.makeConfidenceGraph(G)
r.generatePathAndCycle(G)
paths := r.makeInitialPath()
for i := 0; i < iterations; i++ {
log.Noticef("Starting iteration %d with %d paths", i, len(paths))
G = r.makeGraph(paths)
G = r.makeConfidenceGraph(G)
paths = r.generatePathAndCycle(G)
fmt.Println(paths)
}

log.Notice("Success")
}

// makeInitialPath starts the initial construction of Path object, with one
// contig per Path (trivial Path)
func (r *Anchorer) makeInitialPath() []Path {
// Initially make every contig a single Path object
paths := make([]Path, len(r.contigs))
r.registry = make(Registry)
for i, contig := range r.contigs {
paths[i] = Path{
contigs: []*Contig{contig},
orientations: []int{1},
}
}

return paths
}

// registerPaths stores the mapping between contig to node
func (r *Anchorer) registerPaths(paths []Path) {
nodes := make([]Node, 2*len(paths))
for i, path := range paths {
path.bisect(r.registry, &nodes[2*i], &nodes[2*i+1])
}
}

// ExtractInterContigLinks extracts links from the Bamfile
func (r *Anchorer) ExtractInterContigLinks() {
fh, _ := os.Open(r.Bamfile)
Expand Down Expand Up @@ -115,7 +149,16 @@ func (r *Anchorer) ExtractInterContigLinks() {
sort.Slice(contig.links, func(i, j int) bool {
return contig.links[i].apos < contig.links[j].apos
})
// Sanity check - this produced inconsistent pairing
// if contig.name == "idcChr1.ctg434" {
// for _, link := range contig.links {
// if link.b.name == "idcChr1.ctg433" {
// fmt.Println(link.a.name, link.b.name, link.apos, link.bpos)
// }
// }
// }
}

// Write intra-links to .dis file
for contig, links := range intraLinks {
links = unique(links)
Expand Down Expand Up @@ -169,39 +212,6 @@ type Graph map[*Node]map[*Node]float64
// We iterate through 1 or 2 ranges per contig ID
type Registry map[*Contig][]Range

// makeGraph makes a contig linkage graph
func (r *Anchorer) makeGraph() Graph {
// Initially make every contig a single Path object
paths := make([]Path, len(r.contigs))
nodes := make([]Node, 2*len(r.contigs))
r.registry = make(Registry)
for i, contig := range r.contigs {
path := Path{
contigs: []*Contig{contig},
orientations: []int{1},
}
paths[i] = path
path.bisect(r.registry, &nodes[2*i], &nodes[2*i+1])
}

G := make(Graph)
// Go through the links for each node and compile edges
for _, contig := range r.contigs {
for _, link := range contig.links {
a, b := r.linkToNodes(link)
r.insertEdge(G, a, b)
r.insertEdge(G, b, a)
}
}
nEdges := 0
for _, node := range G {
nEdges += len(node)
}
log.Noticef("Graph contains %d nodes and %d edges", len(G), nEdges)
// fmt.Println(G)
return G
}

// contigToNode takes as input contig and position, returns the nodeID
func (r *Anchorer) contigToNode(contig *Contig, pos int) *Node {
for _, rr := range r.registry[contig] {
Expand Down
53 changes: 37 additions & 16 deletions graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
package allhic

import (
"fmt"
"math"
"sort"
)
Expand All @@ -31,8 +30,28 @@ func (r *Edge) isSister() bool {
return r.weight == 0
}

// updateGraph takes input a list of paths and fuse the nodes
func (r *Anchorer) updateGraph(G Graph) Graph {
// makeGraph makes a contig linkage graph
func (r *Anchorer) makeGraph(paths []Path) Graph {
G := make(Graph)
r.registerPaths(paths)
// Go through the links for each node and compile edges
for _, contig := range r.contigs {
for _, link := range contig.links {
a, b := r.linkToNodes(link)
r.insertEdge(G, a, b)
r.insertEdge(G, b, a)
// if (link.a.name == "idcChr1.ctg433" && link.b.name == "idcChr1.ctg434") ||
// (link.a.name == "idcChr1.ctg434" && link.b.name == "idcChr1.ctg433") {
// fmt.Println("***", link.a.name, link.b.name, link.apos, link.bpos,
// a, b, G[a][b], G[b][a])
// }
}
}
nEdges := 0
for _, node := range G {
nEdges += len(node)
}
log.Noticef("Graph contains %d nodes and %d edges", len(G), nEdges)
return G
}

Expand Down Expand Up @@ -92,11 +111,11 @@ func getSecondLargest(a, b []float64) float64 {

// generatePathAndCycle makes new paths by merging the unique extensions
// in the graph
func (r *Anchorer) generatePathAndCycle(G Graph) {
fmt.Println(G)
func (r *Anchorer) generatePathAndCycle(G Graph) []Path {
visited := map[*Node]bool{}
var isCycle bool
nodeToPath := make(map[*Node]*Path)
paths := []Path{}
for a := range G {
if _, ok := visited[a]; ok {
continue
Expand All @@ -105,22 +124,23 @@ func (r *Anchorer) generatePathAndCycle(G Graph) {
path1, isCycle = dfs(G, a, path1, visited, true)
if isCycle {
path1 = breakCycle(path1)
mergePath(path1, nodeToPath)
paths = append(paths, mergePath(path1, nodeToPath))
continue
}
delete(visited, a)
path2, _ = dfs(G, a, path2, visited, false)

path1 = append(reversePath(path1), path2...)
mergePath(path1, nodeToPath)
paths = append(paths, mergePath(path1, nodeToPath))
}
log.Noticef("A total of %d nodes mapped to new path", len(nodeToPath))
log.Noticef("A total of %d nodes mapped to %d paths", len(nodeToPath), len(paths))
return paths
}

// mergePath converts a single edge path into a node path
func mergePath(path []Edge, nodeToPath map[*Node]*Path) {
func mergePath(path []Edge, nodeToPath map[*Node]*Path) Path {
p := []string{}
s := &Path{}
s := Path{}
for _, edge := range path {
if !edge.isSister() {
continue
Expand All @@ -134,18 +154,19 @@ func mergePath(path []Edge, nodeToPath map[*Node]*Path) {
// TODO: take orientations into account
s.contigs = append(s.contigs, ep.contigs...)
s.orientations = append(s.orientations, ep.orientations...)
nodeToPath[edge.a] = s
nodeToPath[edge.b] = s
nodeToPath[edge.a] = &s
nodeToPath[edge.b] = &s

// Special care needed for reverse orientation
for _, contig := range edge.a.path.contigs {
for _, contig := range ep.contigs {
p = append(p, tag+contig.name)
}
}
s.Length()
fmt.Println(path)
fmt.Println(s)
fmt.Println(p)
// fmt.Println(path)
// fmt.Println(s)
// fmt.Println(p)
return s
}

// reversePath reverses a single edge path into its reverse direction
Expand Down

0 comments on commit 1c635b9

Please sign in to comment.