Skip to content

Commit

Permalink
[optimize] Add parseClustersFile
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jul 7, 2018
1 parent 06b5ab9 commit efc52af
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 22 deletions.
1 change: 1 addition & 0 deletions assess.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func (r *Assesser) Run() {
r.makeModel(r.Seqid + ".distribution.txt")
r.computePosteriorProb()
r.writePostProb(r.Seqid + ".postprob.txt")
log.Notice("Success")
}

// makeModel computes the norms and bins separately to derive an empirical link size
Expand Down
1 change: 1 addition & 0 deletions build.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func (r *Builder) writeAGP(oo *OO) {
func (r *Builder) Run() {
r.AGPfile = RemoveExt(r.Tourfile) + ".agp"
r.Build(r.readFiles())
log.Notice("Success")
}

// Build constructs molecule using component FASTA sequence
Expand Down
4 changes: 2 additions & 2 deletions clm.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
type CLM struct {
REfile string
Clmfile string
Tigs []TigF
Tigs []*TigF
Tour Tour
Signs []byte
tigToIdx map[string]int // From name of the tig to the idx of the Tigs array
Expand Down Expand Up @@ -120,7 +120,7 @@ func (r *CLM) ParseIds() {
words := strings.Fields(scanner.Text())
tig := words[0]
size, _ := strconv.Atoi(words[len(words)-1])
r.Tigs = append(r.Tigs, TigF{idx, tig, size, true})
r.Tigs = append(r.Tigs, &TigF{idx, tig, size, false})
r.tigToIdx[tig] = idx
idx++
}
Expand Down
4 changes: 2 additions & 2 deletions cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ func (r *Partitioner) printClusters() {
}
sort.Strings(names)

fmt.Printf("g%d\t%d\t%s\n", j, len(names), strings.Join(names, ","))
fmt.Fprintf(w, "g%d\t%d\t%s\n", j, len(names), strings.Join(names, ","))
fmt.Printf("g%d\t%d\t%s\n", j, len(names), strings.Join(names, " "))
fmt.Fprintf(w, "g%d\t%d\t%s\n", j, len(names), strings.Join(names, " "))
}
w.Flush()

Expand Down
8 changes: 4 additions & 4 deletions cmd/allhic.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ on a cluster).
Usage: "Skip GA step",
},
cli.BoolFlag{
Name: "startOver",
Usage: "Do not resume from existing tour file",
Name: "resume",
Usage: "Resume from existing tour file",
},
cli.Int64Flag{
Name: "seed",
Expand Down Expand Up @@ -214,14 +214,14 @@ on a cluster).
clustersfile := c.Args().Get(2)
group, _ := strconv.Atoi(c.Args().Get(3))
runGA := !c.Bool("skipGA")
startOver := c.Bool("startOver")
resume := c.Bool("resume")
seed := c.Int64("seed")
npop := c.Int("npop")
ngen := c.Int("ngen")
mutpb := c.Float64("mutpb")
p := allhic.Optimizer{REfile: refile, Clmfile: clmfile,
Clustersfile: clustersfile, Group: group,
RunGA: runGA, StartOver: startOver,
RunGA: runGA, Resume: resume,
Seed: seed, NPop: npop, NGen: ngen, MutProb: mutpb}
p.Run()
return nil
Expand Down
1 change: 1 addition & 0 deletions extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ func (r *Extracter) Run() {
r.makeModel(RemoveExt(r.Bamfile) + ".distribution.txt")
r.calcIntraContigs()
r.calcInterContigs()
log.Notice("Success")
}

// makeModel computes the norms and bins separately to derive an empirical link size
Expand Down
56 changes: 43 additions & 13 deletions optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type Optimizer struct {
Clustersfile string
Group int
RunGA bool
StartOver bool
Resume bool
Seed int64
NPop int
NGen int
Expand All @@ -35,25 +35,25 @@ type Optimizer struct {
// Run kicks off the Optimizer
func (r *Optimizer) Run() {
clm := NewCLM(r.Clmfile, r.REfile)
tourfile := RemoveExt(r.Clmfile) + ".tour"
shuffle := false
tourfile := fmt.Sprintf("%s.g%d.tour", RemoveExt(r.Clmfile), r.Group)

// Load tourfile if it exists
if _, err := os.Stat(tourfile); !r.StartOver && err == nil {
log.Noticef("Found existing tour file")
if _, err := os.Stat(tourfile); r.Resume && err == nil {
log.Noticef("Found existing tour file `%s`", tourfile)
clm.parseTourFile(tourfile)
clm.printTour(os.Stdout, clm.Tour, "INIT")
// Rename the tour file
backupTourFile := tourfile + ".sav"
os.Rename(tourfile, backupTourFile)
log.Noticef("Backup `%s` to `%s`", tourfile, backupTourFile)
} else {
shuffle = true
clm.parseClustersFile(r.Clustersfile, r.Group)
}

shuffle := false // If one wants randomized initialization, set this to true
clm.Activate(shuffle)

// tourfile logs the intermediate configurations
log.Noticef("Optimization history logged to `%s`", tourfile)
fwtour, _ := os.Create(tourfile)
defer fwtour.Close()
clm.printTour(fwtour, clm.Tour, "INIT")
Expand Down Expand Up @@ -112,16 +112,21 @@ func parseTourFile(filename string) []string {
return words
}

// parseTourFile parses tour file
// Only the last line is retained anc onverted into a Tour
func (r *CLM) parseTourFile(filename string) {
words := parseTourFile(filename)
tigs := []Tig{}
// prepareTour prepares a boilerplate for an empty tour
func (r *CLM) prepareTour() {
r.Signs = make([]byte, len(r.Tigs))
for _, tig := range r.Tigs {
tig.IsActive = false
}
}

// parseTourFile parses tour file
// Only the last line is retained anc onverted into a Tour
func (r *CLM) parseTourFile(filename string) {
words := parseTourFile(filename)
r.prepareTour()

tigs := []Tig{}
for _, word := range words {
tigName, tigOrientation := word[:len(word)-1], word[len(word)-1]
idx, ok := r.tigToIdx[tigName]
Expand All @@ -133,9 +138,34 @@ func (r *CLM) parseTourFile(filename string) {
Idx: idx,
})
r.Signs[idx] = tigOrientation
r.Tour.Tigs = tigs
r.Tigs[idx].IsActive = true
}
r.Tour.Tigs = tigs
r.printTour(os.Stdout, r.Tour, "INIT")
}

// parseClustersFile parses clusters file
func (r *CLM) parseClustersFile(clustersfile string, group int) {
recs := ReadCSVLines(clustersfile)
r.prepareTour()

rec := recs[group]
names := strings.Split(rec[2], " ")
tigs := []Tig{}
for _, tigName := range names {
idx, ok := r.tigToIdx[tigName]
if !ok {
log.Errorf("Contig %s not found!", tigName)
continue
}
tigs = append(tigs, Tig{
Idx: idx,
})
r.Signs[idx] = '+'
r.Tigs[idx].IsActive = true
}
r.Tour.Tigs = tigs
r.printTour(os.Stdout, r.Tour, "INIT")
}

// printTour logs the current tour to file
Expand Down
2 changes: 1 addition & 1 deletion partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (r *Partitioner) readRE() {
}
r.contigs = append(r.contigs, ci)
}
r.contigToIdx = make(map[string]int)
r.contigToIdx = map[string]int{}
for i, contig := range r.contigs {
r.contigToIdx[contig.name] = i
}
Expand Down

0 comments on commit efc52af

Please sign in to comment.