In [1]:
using NBInclude
@nbinclude("create_datastructs.ipynb")

adjlist_to_str (generic function with 1 method)

In [None]:
# helpers for dcj operations and distance


function other_adjacency_end(ge::GeneEnd, adj::Adjacency)
    if adj.left == ge
        return adj.right
    elseif adj.right == ge
        return adj.left
    else 
        throw(ArgumentError("Adjacency doesn't have other end." )) 
    end 
end

function other_adjacency_end(t::Telomere, adj::Adjacency)
    return other_adjacency_end(GeneEnd(Telomere()), adj) 
end


# given ge, finds ge in adj list 

# e.g., given list of adjacencies : {a:h, b:t} ... {b:h, d:t} & ge = b:t, 
# returns idx of {b:t, d:t} 
function find_adj(target_ge::GeneEnd, src_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adjs::Vector{Adjacency})
    (target_ge.head == true) ? th_idx = 2 : th_idx = 1
    
    gene_id = target_ge.gene.id
    if gene_id == 0 
        throw(ArgumentError("Gene ID is 0")) 
    end 
    
    src_ge_idx = src_gid_to_l[gene_id][th_idx]
    if src_ge_idx == 0 
        throw(ArgumentError("Source Gene End is 0")) 
    end 
    adj = src_adjs[src_ge_idx]

    if adj.left.gene != Telomere() && adj.left.gene.id == gene_id
        src_ge = adj.left
    elseif adj.right.gene != Telomere() && adj.right.gene.id == gene_id
        src_ge = adj.right 
    end 
    
    return src_ge_idx
end 

In [2]:
"""
defines functions that print out the entire adjacency list for each DCJ operation in order (gene tails are next to an adjacency containing its head & vice versa)
runs in O(n) time so makes the DCJ algo O(N^2)
"""

"defines functions that print out the entire adjacency list for each DCJ operation in order (gene tails are next to an adjacency containing its head & vice versa)\nruns in O(n) time so makes the DCJ algo O(N^2)\n"

In [3]:
# given ge, finds next ge (with new gene) in the next adj

# e.g., given adjs={ge1:h, ge2:t} ... {ge2:h, ge3:t} & ge=ge2:t, 
# finds {ge2:h, ge3:t}, returns ge3:t 
function find_next_adj(ge::GeneEnd, adj::Adjacency, reordered_adj_list::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adjs::Vector{Adjacency}, processed::Set{Int}) 
    (ge.head == true) ? opp_th_idx = 1 : opp_th_idx = 2  

    # idx of {ge2:h, ge3:t}
    next_idx = src_gid_to_l[ge.gene.id][opp_th_idx]
    next_adj = src_adjs[next_idx]

    if next_idx ∉ processed 
        push!(processed, next_idx)
        push!(reordered_adj_list, next_adj)
    end 
    
    if next_adj.left.gene == ge.gene
        ge = other_adjacency_end(next_adj.left, next_adj)
    elseif next_adj.right.gene == ge.gene
        ge = other_adjacency_end(next_adj.right, next_adj)
    end 
    
    return ge 
end 


reorder_adjs (generic function with 1 method)

In [None]:

# prints out adj list in order s.t. neighboring adj have h/t gene end pairs 
# O(N), n = length source adjacencies 
# can reconstruct genome from this in O(N)
# (note that order within the adj don't matter)
function reorder_adjs(src_adjs::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}})
    telos = Vector{Int}() 
    circ = Vector{Int}() 
    processed = Set{Int}()
    reordered_adj_list = Vector{Adjacency}()

    for i in 1:length(src_adjs)
        adj = src_adjs[i]
        if adj.left.gene == Telomere() || adj.right.gene == Telomere() 
            push!(telos, i)
        else 
            push!(circ, i)
        end 
    end 
    
    # handle telomeres
    for i in telos 
        if i in processed 
            continue 
        else 
            push!(processed, i)
        end
        
        adj = src_adjs[i]
        push!(reordered_adj_list, adj)

        ge = other_adjacency_end(Telomere(), adj)
        while ge.gene != Telomere()  
            ge = find_next_adj(ge, adj, reordered_adj_list, src_gid_to_l, src_adjs, processed)
        end 
    end 
    
    # handle adj with two genes
    for i in circ  
        if i in processed 
            continue 
        else 
            push!(processed, i)
        end

        adj = src_adjs[i]
        push!(reordered_adj_list, adj)

        first_gene = adj.left.gene 
        ge = adj.right 
        first = true       
        
        while ge.gene != first_gene || first == true  
            ge = find_next_adj(ge, adj, reordered_adj_list, src_gid_to_l, src_adjs, processed)

            first = false 
        end   
    end 
    return reordered_adj_list
end 

In [4]:
# src = ".a2a1."

# id_counter = Ref{Int}(1)
# id_to_str = Dict{Int, String}()
# str_to_id = Dict{String, Int}()

# src_genome = string_to_genome(src, id_counter, id_to_str, str_to_id, false)

# genome_to_adj_list(src_genome)

LoadError: KeyError: key "a2" not found