From c36c4f48f9c028e24ecc4d7f3aba1f90b6aa0688 Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Mon, 3 Sep 2018 16:25:08 -0700 Subject: [PATCH] [extract] Add label column to pairs.txt --- extract.go | 9 +++++---- partition.go | 2 ++ prune.go | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/extract.go b/extract.go index 0b7d2e0..6fce4f4 100644 --- a/extract.go +++ b/extract.go @@ -64,6 +64,7 @@ type ContigPair struct { L1, L2 int nObservedLinks int nExpectedLinks float64 + label string // allelic/cross-allelic/ok } // String outputs the string representation of ContigInfo @@ -73,9 +74,9 @@ func (r ContigInfo) String() string { // String outputs the string representation of ContigInfo func (r ContigPair) String() string { - return fmt.Sprintf("%d\t%d\t%s\t%s\t%d\t%d\t%d\t%.1f", + return fmt.Sprintf("%d\t%d\t%s\t%s\t%d\t%d\t%d\t%.1f\t%s", r.ai, r.bi, r.at, r.bt, r.RE1, r.RE2, - r.nObservedLinks, r.nExpectedLinks) + r.nObservedLinks, r.nExpectedLinks, r.label) } // uintLog2 calculates the integer log2 of a number @@ -241,7 +242,7 @@ func (r *Extracter) calcInterContigs() { L1, L2 := ca.length, cb.length cp = &ContigPair{ai: ai, bi: bi, at: at, bt: bt, RE1: ca.recounts, RE2: cb.recounts, - L1: L1, L2: L2} + L1: L1, L2: L2, label: "ok"} cp.nExpectedLinks = sumf(r.findExpectedInterContigLinks(0, L1, L2)) cp.nObservedLinks = len(line.links) contigPairs[pair] = cp @@ -253,7 +254,7 @@ func (r *Extracter) calcInterContigs() { f, _ := os.Create(outfile) w := bufio.NewWriter(f) defer f.Close() - fmt.Fprintf(w, "#X\tY\tContig1\tContig2\tRE1\tRE2\tObservedLinks\tExpectedLinksIfAdjacent\n") + fmt.Fprintf(w, "#X\tY\tContig1\tContig2\tRE1\tRE2\tObservedLinks\tExpectedLinksIfAdjacent\tLabel\n") allPairs := []*ContigPair{} for _, c := range contigPairs { diff --git a/partition.go b/partition.go index b1e4b31..8561b38 100644 --- a/partition.go +++ b/partition.go @@ -237,12 +237,14 @@ func parseDist(pairsFile string) []ContigPair { RE2, _ := strconv.Atoi(rec[5]) nObservedLinks, _ := strconv.Atoi(rec[6]) nExpectedLinks, _ := strconv.ParseFloat(rec[7], 64) + label := rec[8] cp := ContigPair{ ai: ai, bi: bi, at: at, bt: bt, RE1: RE1, RE2: RE2, nObservedLinks: nObservedLinks, nExpectedLinks: nExpectedLinks, + label: label, } edges = append(edges, cp) diff --git a/prune.go b/prune.go index 161d228..3694cae 100644 --- a/prune.go +++ b/prune.go @@ -26,6 +26,11 @@ type Pruner struct { type AlleleGroup []string // Run calls the pruning steps +// The pruning algorithm is a heuristic method that removes the following pairs: +// +// 1. Alleleic, these are directly the pairs of allelic contigs given in the allele table +// 2. Cross-allelic, these are any contigs that connect to the allelic contigs so we only keep the +// the best pair func (r *Pruner) Run() { edges := parseDist(r.PairsFile) alleleGroups := parseAllelesTable(r.AllelesFile)