Skip to content

Commit

Permalink
[partition] Do not run clustering when k = 1
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jul 7, 2018
1 parent 8b23329 commit 06b5ab9
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 18 deletions.
13 changes: 10 additions & 3 deletions cmd/allhic.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,16 @@ can be generated with the "extract" sub-command.
Name: "optimize",
Usage: "Order-and-orient tigs in a group",
UsageText: `
allhic optimize counts_RE.txt clmfile [options]
allhic optimize counts_RE.txt clmfile clusters.txt group_number [options]
Optimize function:
Given a set of Hi-C contacts between contigs, as specified in the
clmfile, reconstruct the highest scoring ordering and orientations
for these contigs.
for these contigs. Optimize run on a specific partition in "clusters.txt"
as generated by "partition" sub-command, with the group_number matching the
order appearing in "clusters.txt". Typically, if there are k clusters, we
can start k separate "optimize" commands for parallelism (for example,
on a cluster).
`,
Flags: []cli.Flag{
cli.BoolFlag{
Expand Down Expand Up @@ -200,20 +204,23 @@ for these contigs.
},
},
Action: func(c *cli.Context) error {
if len(c.Args()) < 2 {
if len(c.Args()) < 4 {
cli.ShowSubcommandHelp(c)
return cli.NewExitError("Must specify clmfile", 1)
}

refile := c.Args().Get(0)
clmfile := c.Args().Get(1)
clustersfile := c.Args().Get(2)
group, _ := strconv.Atoi(c.Args().Get(3))
runGA := !c.Bool("skipGA")
startOver := c.Bool("startOver")
seed := c.Int64("seed")
npop := c.Int("npop")
ngen := c.Int("ngen")
mutpb := c.Float64("mutpb")
p := allhic.Optimizer{REfile: refile, Clmfile: clmfile,
Clustersfile: clustersfile, Group: group,
RunGA: runGA, StartOver: startOver,
Seed: seed, NPop: npop, NGen: ngen, MutProb: mutpb}
p.Run()
Expand Down
20 changes: 11 additions & 9 deletions optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ import (

// Optimizer runs the order-and-orientation procedure, given a clmfile
type Optimizer struct {
Clmfile string
REfile string
RunGA bool
StartOver bool
Seed int64
NPop int
NGen int
MutProb float64
CrossProb float64
Clmfile string
REfile string
Clustersfile string
Group int
RunGA bool
StartOver bool
Seed int64
NPop int
NGen int
MutProb float64
CrossProb float64
}

// Run kicks off the Optimizer
Expand Down
31 changes: 25 additions & 6 deletions partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,34 @@ type Partitioner struct {
// Run is the main function body of partition
func (r *Partitioner) Run() {
r.readRE()
r.makeMatrix()
r.skipContigsWithFewREs()
r.skipRepeats()
r.Cluster()
if r.K == 1 {
r.makeTrivialClusters()
} else {
r.makeMatrix()
r.skipRepeats()
r.Cluster()
}
r.printClusters()

log.Notice("Success")
}

// makeTrivialClusters make a single cluster containing all contigs
// except the really short ones
func (r *Partitioner) makeTrivialClusters() {
contigs := []int{}
for i, contig := range r.contigs {
if contig.skip {
continue
}
contigs = append(contigs, i)
}
clusters := Clusters{
0: contigs,
}
r.clusters = clusters
}

// skipContigsWithFewREs skip contigs with fewere than MinREs
// This reads in the `counts_RE.txt` file generated by extract()
func (r *Partitioner) skipContigsWithFewREs() {
Expand Down Expand Up @@ -160,8 +179,8 @@ func (r *Partitioner) readRE() {
for i, contig := range r.contigs {
r.contigToIdx[contig.name] = i
}
log.Noticef("Loading contig RE lengths for normalization from `%s`",
r.Contigsfile)
log.Noticef("Loaded %d contig RE lengths for normalization from `%s`",
len(r.contigs), r.Contigsfile)
}

// parseDist imports the edges of the contig into a slice of DistLine
Expand Down

0 comments on commit 06b5ab9

Please sign in to comment.