Skip to content

Commit

Permalink
[hic] Add report_active()
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Nov 7, 2017
1 parent cdf113b commit 49d8406
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
50 changes: 48 additions & 2 deletions hic/clm.nim
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import future
import math
import os
import parseutils
import sets
import strutils
import tables
import logging

var logger = newConsoleLogger()
logger.addHandler()


##
Expand All @@ -15,14 +22,14 @@ import tables
## tig00030676- tig00077819+ 7 118651 91877 91877 209149 125906 146462 146462
## tig00030676- tig00077819- 7 108422 157204 157204 137924 142611 75169 75169
##

type
CLMFile* = ref object
name*: string
clmfile*: string
idsfile*: string
tig_to_size*: OrderedTable[string, int]
contacts*: Table[(string, string), int]
active: HashSet[string]


proc initCLMFile*(name: string, clmfile: string): CLMFile =
Expand All @@ -32,6 +39,7 @@ proc initCLMFile*(name: string, clmfile: string): CLMFile =
result.idsfile = clmfile.rsplit('.', maxsplit=1)[0] & ".ids"
result.tig_to_size = initOrderedTable[string, int]()
result.contacts = initTable[(string, string), int]()
result.active = initSet[string]()


##
Expand All @@ -41,14 +49,14 @@ proc initCLMFile*(name: string, clmfile: string): CLMFile =
## tig00035238 46779 recover
## tig00030900 119291
##

proc parse_ids*(this: CLMFile, skiprecover: bool) =
for line in lines this.idsfile:
let
atoms = line.split()
tig = atoms[0]
size = parseInt(atoms[1])
this.tig_to_size[tig] = size
this.active.incl(tig)


proc parse_clm*(this: CLMFile) =
Expand All @@ -72,3 +80,41 @@ proc parse_clm*(this: CLMFile) =
continue

this.contacts[(at, bt)] = links


proc parse*(this: CLMFile) =
this.parse_ids(true)
this.parse_clm()


proc N*(this: CLMFile): int =
len(this.active)


proc active_sizes*(this: CLMFile): seq[int] =
lc[this.tig_to_size[x] | (x <- this.active), int]


proc report_active*(this: CLMFile) =
debug("Active contigs: $# (length=$#)".format(this.N, this.active_sizes.sum()))

##
## Select contigs in the current partition. This is the setup phase of the
## algorithm, and supports two modes:
## - "de novo": This is useful at the start of a new run where no tours are
## available. We select the strong contigs that have significant number
## of links to other contigs in the partition. We build a histogram of
## link density (# links per bp) and remove the contigs that appear to be
## outliers. The orientations are derived from the matrix decomposition
## of the pairwise strandedness matrix O.
## - "hotstart": This is useful when there was a past run, with a given
## tourfile. In this case, the active contig list and orientations are
## derived from the last tour in the file.
##
proc activate*(this: CLMFile, tourfile = "", minsize = 10000) =
var tf = tourfile
if existsFile(tourfile):
tf = ""

if true:
this.report_active()
5 changes: 2 additions & 3 deletions hic_opt.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import tables

proc main() {.discardable.} =
var c = initCLMFile("test", "tests/test.clm")
c.parse_ids(true)
c.parse_clm()
echo c.contacts
c.parse()
c.activate()


when isMainModule:
Expand Down

0 comments on commit 49d8406

Please sign in to comment.