Skip to content

Commit

Permalink
[anchor] Use int64 instead of float64
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jun 29, 2018
1 parent b708816 commit 14020a4
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 51 deletions.
48 changes: 24 additions & 24 deletions anchor.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,31 @@ type Anchorer struct {
// Contig stores the name and length of each contig
type Contig struct {
name string
length int
length int64
links []*Link
path *Path
start int
orientation int // 1 => +, -1 => -
start int64
orientation int8 // 1 => +, -1 => -
segments []Range
}

// Link contains a specific inter-contig link
type Link struct {
a, b *Contig // Contig ids
apos, bpos int // Positions
apos, bpos int64 // Positions
}

// Path is a collection of ordered contigs
type Path struct {
contigs []*Contig // List of contigs
LNode, RNode *Node // Two nodes at each end
length int // Cumulative length of all contigs
length int64 // Cumulative length of all contigs
}

// Range tracks contig:start-end
type Range struct {
start int
end int
start int64
end int64
node *Node
}

Expand Down Expand Up @@ -182,7 +182,7 @@ func (r *Anchorer) ExtractInterContigLinks() {
for _, ref := range refs {
contig := Contig{
name: ref.Name(),
length: ref.Len(),
length: int64(ref.Len()),
}
r.contigs = append(r.contigs, &contig)
r.nameToContig[contig.name] = &contig
Expand Down Expand Up @@ -218,7 +218,7 @@ func (r *Anchorer) ExtractInterContigLinks() {

// An inter-contig link
a.links = append(a.links, &Link{
a: a, b: b, apos: apos, bpos: bpos,
a: a, b: b, apos: int64(apos), bpos: int64(bpos),
})
interTotal++
}
Expand Down Expand Up @@ -264,7 +264,7 @@ func (r *Path) String() string {
}

// contigToNode takes as input contig and position, returns the nodeID
func contigToNode(contig *Contig, pos int) *Node {
func contigToNode(contig *Contig, pos int64) *Node {
for _, rr := range contig.segments { // multiple 'segments'
if pos >= rr.start && pos < rr.end {
return rr.node
Expand All @@ -284,21 +284,21 @@ func (r *Anchorer) linkToNodes(link *Link) (*Node, *Node) {
// insertEdge adds just one link to the graph
func (r *Anchorer) insertEdge(G Graph, a, b *Node) {
if _, aok := G[a]; aok {
G[a][b] += 1.0
G[a][b]++
} else {
G[a] = map[*Node]float64{b: 1.0}
G[a] = map[*Node]int64{b: 1}
}
}

// findMidPoint find the center of a path for bisect
func (r *Path) findMidPoint() (int, int) {
func (r *Path) findMidPoint() (int, int64) {
r.length = 0
for _, contig := range r.contigs {
r.length += contig.length
}

midpos := r.length / 2
cumsize := 0
midpos := int64(math.Ceil(float64(r.length) / 2))
cumsize := int64(0)
i := 0
var contig *Contig
for i, contig = range r.contigs {
Expand Down Expand Up @@ -367,25 +367,25 @@ func (r *Path) bisect() {

// makeContigStarts returns starts of contigs within a path
func (r *Anchorer) makeContigStarts() {
pos := 0
pos := int64(0)
for _, contig := range r.path.contigs {
contig.start = pos
pos += contig.length
}
}

// findBin returns the i-th bin along the path
func findBin(contig *Contig, pos, resolution int) int {
func findBin(contig *Contig, pos, resolution int64) int {
offset := pos
if contig.orientation < 0 {
offset = contig.length - pos
}
return (contig.start + offset) / resolution
return int((contig.start + offset) / resolution)
}

// splitPath takes a path and look at joins that are weak
// Scans the path at certain resolution r, and search radius is d
func (r *Anchorer) splitPath(res, d int) {
func (r *Anchorer) splitPath(res int64, d int) {
// Look at all intra-path links, then store the counts to a
// sparse matrix, indexed by i, j, C[i, j] = # of links between
// i-th locus and j-th locus
Expand Down Expand Up @@ -466,13 +466,13 @@ func inspectGaps(path *Path) {
}

// identifyGap prints out all the gaps that lie within the bin
func (r *Anchorer) identifyGap(breakPoints []int, res int) {
contigStart := 0
func (r *Anchorer) identifyGap(breakPoints []int, res int64) {
contigStart := int64(0)
j := 0
for i, contig := range r.path.contigs {
if contigStart >= breakPoints[j]*res {
if contigStart >= (breakPoints[j]+1)*res {
for j < len(breakPoints) && contigStart >= (breakPoints[j]+1)*res {
if contigStart >= int64(breakPoints[j])*res {
if contigStart >= int64(breakPoints[j]+1)*res {
for j < len(breakPoints) && contigStart >= int64(breakPoints[j]+1)*res {
j++
}
// Exhausted, terminate
Expand Down
2 changes: 2 additions & 0 deletions base.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ const (
EffLinkDist = 1 << 19
// BinNorm is a ratio to make the link density human readable
BinNorm = 1000000.0
// BigNorm is a big integer multiplier so we don't have to mess with float64
BigNorm = int64(1000000000000)
// EPS is that Q must be larger than this value, used in cluster.go
EPS = 1e-5
// MinAvgLinkage is the minimum cutoff for merging clusters
Expand Down
67 changes: 40 additions & 27 deletions graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
package allhic

import (
"fmt"
"math"
"sort"
)
Expand All @@ -23,11 +24,11 @@ type Node struct {
// Edge is between two nodes in a graph
type Edge struct {
a, b *Node
weight float64
weight int64
}

// Graph is an adjacency list
type Graph map[*Node]map[*Node]float64
type Graph map[*Node]map[*Node]int64

// isLNode returns if a Node is an LNode (5`-end`)
func (r *Node) isLNode() bool {
Expand All @@ -40,8 +41,12 @@ func (r *Node) isRNode() bool {
}

// length returns the length of a node, which is half of the path length
func (r *Node) length() float64 {
return float64(r.path.length) / 2
func (r *Node) length() int64 {
ans := int64(r.path.length) / 2
if r.isRNode() {
return ans
}
return r.path.length - ans
}

// isReverse returns the orientation of an edge
Expand Down Expand Up @@ -76,10 +81,10 @@ func (r *Anchorer) makeGraph(paths PathSet) Graph {
}
}

// Normalize against the product of two paths
// Normalize against the product of lengths of two paths
for a, nb := range G {
for b, score := range nb {
G[a][b] = score / a.length() / b.length()
G[a][b] = score * BigNorm / (a.length() * b.length())
}
}

Expand All @@ -99,53 +104,62 @@ func (r *Anchorer) makeGraph(paths PathSet) Graph {
// 1 - calculate the link density as links divided by the product of two contigs
// 2 - calculate the confidence as the weight divided by the second largest edge
func (r *Anchorer) makeConfidenceGraph(G Graph) Graph {
twoLargest := map[*Node][]float64{}
twoLargest := map[*Node][]int64{}

for a, nb := range G {
first, second := 0.0, 0.0
first, second := int64(0), int64(0)
for _, score := range nb {
if score > first {
first, second = score, first
} else if score < first && score > second {
second = score
}
}
twoLargest[a] = []float64{first, second}
twoLargest[a] = []int64{first, second}
}
// fmt.Println(G)

confidenceGraph := Graph{}
// Now a second pass to compute confidence
for a, nb := range G {
for b, weight := range nb {
secondLargest := getSecondLargest(twoLargest[a], twoLargest[b])
confidence := weight / secondLargest
if confidence > 1 {
if secondLargest == 0 {
continue
}
confidence := weight * BigNorm / secondLargest
if confidence > BigNorm {
if _, ok := confidenceGraph[a]; ok {
confidenceGraph[a][b] = confidence
} else {
confidenceGraph[a] = map[*Node]float64{b: confidence}
confidenceGraph[a] = map[*Node]int64{b: confidence}
}
// There can be ties in terms of scores
// if len(confidenceGraph[a]) > 1 {
// fmt.Println(a.path, "<=>", b.path, G[a], "\n", confidenceGraph[a],
// twoLargest[a], twoLargest[b], secondLargest)
// for k, v := range confidenceGraph[a] {
// fmt.Println(k, k.path, k.sister, v)
// }
// }
if len(confidenceGraph[a]) > 1 {
// if confidence < BigNorm*101/100 {
fmt.Println(a.path, "<=>", b.path, a.isLNode(), b.isLNode(),
G[a], "\n", confidenceGraph[a],
twoLargest[a], twoLargest[b], secondLargest)
for k, v := range confidenceGraph[a] {
fmt.Println(k, k.path, k.sister, v)
}
}
}
}
}
// fmt.Println(confidenceGraph)
return confidenceGraph
}

// Get the second largest number without sorting
// a, b are both 2-item arrays
func getSecondLargest(a, b []float64) float64 {
func getSecondLargest(a, b []int64) int64 {
A := append(a, b...)
sort.Float64s(A)
sort.Slice(A, func(i, j int) bool {
return A[i] < A[j]
})
// Some edge will appear twice in this list so need to remove it
for i := 2; i >= 0; i-- { // Is precision an issue here?
for i := 2; i >= 0; i-- {
if A[i] < A[3] {
return A[i]
}
Expand Down Expand Up @@ -209,8 +223,9 @@ func (r *Anchorer) generatePathAndCycle(G Graph) PathSet {
path2, _ = dfs(G, a, path2, visited, false)
path1 = append(reversePath(path1), path2...)
}

// fmt.Println("path1", path1)
path = mergePath(path1)
// fmt.Println("path from", a, path)
for _, contig := range path.contigs {
contig.path = path
}
Expand All @@ -232,8 +247,6 @@ func mergePath(path []Edge) *Path {
s.contigs = append(s.contigs, ep.contigs...)
}
s.bisect()
// fmt.Println(path)
// fmt.Println(s)
return s
}

Expand All @@ -251,7 +264,7 @@ func reversePath(path []Edge) []Edge {
// breakCycle breaks a single edge path into two edge paths
// breakage occurs at the weakest link
func breakCycle(path []Edge) []Edge {
minI, minWeight := 0, math.MaxFloat64
minI, minWeight := 0, int64(math.MaxInt64)
for i, edge := range path {
if edge.weight > 1 && edge.weight < minWeight {
minI, minWeight = i, edge.weight
Expand All @@ -277,7 +290,7 @@ func dfs(G Graph, a *Node, path []Edge, visited map[*Node]bool, visitSister bool
}
if nb, ok := G[a]; ok {
var maxb *Node
maxWeight := 0.0 // for tie breaking
maxWeight := int64(0) // for tie breaking
for b, weight := range nb {
if weight > maxWeight {
maxb, maxWeight = b, weight
Expand Down

0 comments on commit 14020a4

Please sign in to comment.