In [1]:
using NBInclude
@nbinclude("datastructs.ipynb")

combine_ge (generic function with 1 method)

In [None]:
function Base.show(dict::DefaultDict{Int, Vector{Int}}, id_to_char::Dict{Int, Char})
    for (key, value) in dict
        # println("$(key.dna) (id:$(key.id)) => $(value)")
        println(id_to_char[key], " $(key) => $(value)")
    end
end 

In [2]:
# process adj list 
# create dictionary of gene ID --> index/location of gene's head and tail 

function assign_ge_idx_to_gid_to_locdict(ge::GeneEnd, idx::Ref{Int}, gid_to_loc::DefaultDict{Int, Vector{Int}})
    if ge.gene == Telomere() 
        return
    end  

    if ge.head == true
        gid_to_loc[ge.gene.id][2] = idx[]
    else 
        gid_to_loc[ge.gene.id][1] = idx[]
    end
end 

function process_adj_list(adj_list:: Vector{Adjacency})
    geneid_to_location = DefaultDict{Int, Vector{Int}}(() -> zeros(Int, 2)) # tail = idx 1, head = 2 in array
    idx = Ref{Int}(1)

    for adj in adj_list
        assign_ge_idx_to_gid_to_locdict(adj.left, idx, geneid_to_location)
        assign_ge_idx_to_gid_to_locdict(adj.right, idx, geneid_to_location)
        idx[] += 1
    end 
    
    return geneid_to_location 
end 
     

process_adj_list (generic function with 1 method)

In [None]:
# finds next ge given ge 
# given ge we're looking for, finds new ge that corresponds to given ge (tail/head) 
# returns new ge in new adjacency

# e.g., given adj set: {a:h, b:t} ... {b:h, d:t} & ge = b:t, 
# finds {b:h, d:t}, returns d:t 
function find_next_adj(ge::GeneEnd, adj_set::Set{Adjacency}, adj_list::Vector{Adjacency}, processed_adjs::Set{Adjacency}, adj::Adjacency) 
    for adj in adj_set 
        if adj in processed_adjs
            continue 
        end 

        if adj.left.gene == ge.gene
            push!(adj_list, adj)
            push!(processed_adjs, adj)
            ge = other_adjacency_end(adj.left, adj)
            return ge 
        elseif adj.right.gene == ge.gene
            push!(adj_list, adj)
            push!(processed_adjs, adj)
            ge = other_adjacency_end(adj.right, adj)
            return ge 
        end 
    end 
    return other_adjacency_end(ge, adj)
end 

# O(N^2)
# prints adj set as adj list (in order) 
# note that order within the adj don't matter 
function convert_adj_set_as_adj_list(og_adj_set::Set{Adjacency})
    telos = Vector{Adjacency}() 
    adj_set = copy(og_adj_set)
    adj_list = Vector{Adjacency}() 

    # handle linear chroms 
    for adj in adj_set
        if adj.left.gene == Telomere() || adj.right.gene == Telomere() 
            push!(telos, adj)
        end 
    end 

    processed_adjs = Set{Adjacency}() 
    for adj in telos 
        if adj in processed_adjs
            continue 
        end 
        ge = non_telo_end(adj) 
        push!(adj_list, adj)
        push!(processed_adjs, adj)
        while ge.gene != Telomere()  
            ge = find_next_adj(ge, adj_set, adj_list, processed_adjs, adj)
        end 
    end 
    
    # handle circular chroms 
    for adj in adj_set  # only circular left 
        if adj in processed_adjs
            continue 
        end 
        push!(processed_adjs, adj)
        push!(adj_list, adj)
        first_gene = adj.left.gene 
        ge = GeneEnd(adj.right.gene, !adj.right.head)  # ge that we're looking for
        first = true       
        
        while ge.gene != first_gene || first == true  
            ge = find_next_adj(ge, adj_set, adj_list, processed_adjs, adj)

            first = false 
        end   
    end 
    return adj_list
end 

In [3]:
# helpers for dcj operations and distance

#TODO combine with find_next_adj() cuz they do the same thing 
# finds target gene end in source adjacency list 
# returns index and left/right in the source adj list

# e.g., given adj set: {a:h, b:t} ... {b:h, d:t} & ge = b:t, 
# finds {b:h, d:t}, returns d:t 
function find_tar_ge_in_src_adjs(target_ge::GeneEnd, src_gid_to_l::DefaultDict{Int, Vector{Int}}, tar_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adjs::Vector{Adjacency})
    (target_ge.head == true) ? th_idx = 2 : th_idx = 1
        
    # print(target_ge.gene)
    gene_id = target_ge.gene.id
    # print(gene_id)

    src_ge_idx = src_gid_to_l[gene_id][th_idx]

    adj = src_adjs[src_ge_idx]

    if adj.left.gene == Telomere() 
        return adj.left, src_ge_idx, adj
    elseif adj.right.gene == Telomere() 
        return adj.right, src_ge_idx, adj
    end 

    if adj.left.gene.id == gene_id
        src_ge = adj.left
    else 
        src_ge = adj.right 
    end 
    
    return src_ge, src_ge_idx, adj
end 


function update_adj_set(p::GeneEnd, q::GeneEnd, u::GeneEnd, v::GeneEnd, u_idx::Int, v_idx::Int, src_adjs::Vector{Adjacency}, src_adj_set::Set{Adjacency}) 
    #  replace adj u and v in A by ( {p, q} and (u\{p}) U (v\{q}) )
    pq = combine_ge(p, u_idx, q, v_idx)
    other_ge_u = other_adjacency_end(u, src_adjs[u_idx])
    other_ge_v = other_adjacency_end(v, src_adjs[v_idx])
    excluding_pq = combine_ge(other_ge_u, u_idx, other_ge_v, v_idx) 

    #  remove adj with u, v in src_adj_set 
    delete!(src_adj_set, src_adjs[u_idx])
    delete!(src_adj_set, src_adjs[v_idx])
    
    #  add adjusted adjacencies to src_adj_set
    push!(src_adj_set, pq)
    push!(src_adj_set, excluding_pq)

    return pq, excluding_pq
end 



update_adj_set (generic function with 1 method)

In [None]:
# more helpers for dcj operations and distance

function other_adjacency_end(ge::GeneEnd, adj::Adjacency)
    if adj.left == ge
        return adj.right
    else
        return adj.left
    end 
end

function non_telo_end(adj::Adjacency)
    return other_adjacency_end(Telomere(), adj) 
end

function combine_ge(u::GeneEnd, u_idx::Int, v::GeneEnd, v_idx::Int)
    if u_idx < v_idx 
        u_lt_v = true 
    else 
        u_lt_v = false 
    end 

    if !u_lt_v 
        return Adjacency(v, u)
    else 
        return Adjacency(u, v)
    end

end 