Skip to content

Commit

Permalink
[allhic] Generate Q matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
tanghaibao committed Jan 6, 2018
1 parent 2403821 commit 93ed0b2
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 24 deletions.
12 changes: 12 additions & 0 deletions allhic/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,15 @@ func Make2DSlice(m, n int) [][]int {
}
return P
}

// Make3DSlice allocates a 3D matrix with shape (m, n, o)
func Make3DSlice(m, n, o int) [][][]int {
P := make([][][]int, m)
for i := 0; i < m; i++ {
P[i] = make([][]int, n)
for j := 0; j < n; j++ {
P[i][j] = make([]int, o)
}
}
return P
}
35 changes: 15 additions & 20 deletions allhic/clm.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,28 @@ type CLMFile struct {
Idsfile string
Tigs []TigF
Tour Tour
signs []byte
tigToIdx map[string]int // From name of the tig to the idx of the Tigs array
contacts map[Pair]Contact // (tigA, tigB) => {strandedness, nlinks, meanDist}
orientedContacts map[OrientedPair]GArray // (tigA, tigB, oriA, oriB) => golden array i.e. exponential histogram
}

// Pair contains two contigs in contact
type Pair struct {
a string
b string
ai int
bi int
}

// OrientedPair contains two contigs and their orientations
type OrientedPair struct {
a string
b string
ai int
bi int
ao byte
bo byte
}

// Contact stores how many links between two contigs
type Contact struct {
ai int
bi int
strandedness int
nlinks int
meanDist int
Expand Down Expand Up @@ -168,17 +167,17 @@ func (r *CLMFile) ParseClm() {
if ao != bo {
strandedness = -1
}
pair := Pair{at, bt}
c := Contact{ai, bi, strandedness, nlinks, meanDist}
pair := Pair{ai, bi}
c := Contact{strandedness, nlinks, meanDist}
if p, ok := r.contacts[pair]; ok {
if p.meanDist > meanDist {
r.contacts[pair] = c
}
} else {
r.contacts[pair] = c
}
r.orientedContacts[OrientedPair{at, bt, ao, bo}] = gdists
r.orientedContacts[OrientedPair{bt, at, rr(bo), rr(ao)}] = gdists
r.orientedContacts[OrientedPair{ai, bi, ao, bo}] = gdists
r.orientedContacts[OrientedPair{bi, ai, rr(bo), rr(ao)}] = gdists
}
// fmt.Println(r.orientedContacts)
}
Expand All @@ -189,11 +188,9 @@ func (r *CLMFile) ParseClm() {
func (r *CLMFile) calculateDensities() []float64 {
N := len(r.Tigs)
densities := make([]int, N)
for _, contact := range r.contacts {
ai := contact.ai
bi := contact.bi
densities[ai] += contact.nlinks
densities[bi] += contact.nlinks
for pair, contact := range r.contacts {
densities[pair.ai] += contact.nlinks
densities[pair.bi] += contact.nlinks
}

logdensities := make([]float64, N)
Expand All @@ -209,8 +206,6 @@ func (r *CLMFile) calculateDensities() []float64 {
func (r *CLMFile) pruneByDensity() {
logdensities := r.calculateDensities()
lb, ub := OutlierCutoff(logdensities)
// fmt.Println(logdensities)

log.Noticef("Log10(link_densities) ~ [%.5f, %.5f]", lb, ub)
for i, tig := range r.Tigs {
if logdensities[i] < lb && tig.Size < MINSIZE*10 {
Expand Down Expand Up @@ -323,9 +318,9 @@ func (r *CLMFile) reportActive() {
func (r *CLMFile) M() [][]int {
N := len(r.Tigs)
P := Make2DSlice(N, N)
for _, contact := range r.contacts {
ai := contact.ai
bi := contact.bi
for pair, contact := range r.contacts {
ai := pair.ai
bi := pair.bi
P[ai][bi] = contact.nlinks
P[bi][ai] = contact.nlinks
}
Expand Down
36 changes: 32 additions & 4 deletions allhic/orientation.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func flipLog(method string, score, scoreFlipped float64, tag string) {
}

// flipAll initializes the orientations based on pairwise O matrix.
func (r *CLMFile) flipAll() {
func (r *CLMFile) flipAll() []byte {
var (
M mat64.Dense
e mat64.EigenSym
Expand All @@ -30,18 +30,46 @@ func (r *CLMFile) flipAll() {
N := len(r.Tigs)
e.Factorize(r.O(), true)
M.EigenvectorsSym(&e)
v := M.ColView(N - 1) // v is the eigenvec corresponding to the largest eigenval
fmt.Printf("%0.4v\n\n", e.Values(nil))
fmt.Printf("%0.2v\n\n", mat64.Formatted(M.ColView(N-1)))
fmt.Printf("%0.2v\n\n", mat64.Formatted(v))

signs := make([]byte, N)
for i := 0; i < N; i++ {
if v.At(i, 0) < 0 {
signs[i] = '-'
} else {
signs[i] = '+'
}
}
fmt.Println(signs)
return signs
}

// O yields a pairwise orientation matrix, where each cell contains the strandedness
// times the number of links between i-th and j-th contig
func (r *CLMFile) O() *mat64.SymDense {
N := len(r.Tigs)
P := mat64.NewSymDense(N, nil)
for _, contact := range r.contacts {
for pair, contact := range r.contacts {
score := float64(contact.strandedness * contact.nlinks)
P.SetSym(contact.ai, contact.bi, score)
P.SetSym(pair.ai, pair.bi, score)
}
return P
}

// Q yields a contact frequency matrix when contigs are already oriented. This is a
// similar matrix as M, but rather than having the number of links in the
// cell, it points to an array that has the actual distances.
func (r *CLMFile) Q() [][][]int {
N := len(r.Tigs)
P := Make3DSlice(N, N, BB)
for pair, gdists := range r.orientedContacts {
ai := pair.ai
bi := pair.bi
if r.signs[ai] == pair.ao && r.signs[bi] == pair.bo {
P[ai][bi] = gdists[:]
}
}
return P
}

0 comments on commit 93ed0b2

Please sign in to comment.