In [1]:
using NBInclude
# @nbinclude("logging.ipynb")
@nbinclude("datastructs.ipynb")
@nbinclude("helpers.ipynb")

find_adj (generic function with 1 method)

In [2]:
function reassign_locs(pq::Adjacency, excluding_pq::Adjacency, u_idx::Int, v_idx::Int, gid_to_l::DefaultDict{Int, Vector{Int}})
    # want to update dictionary given: 
    # src_adjs[u_idx] = pq 
    # src_adjs[v_idx] = excluding_pq 

    idx = Ref{Int}(u_idx)
    if pq.left.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(pq.left, idx, gid_to_l)
    end 
    if pq.right != Telomere() 
        assign_ge_idx_to_gid_to_locdict(pq.right, idx, gid_to_l)
    end 
    
    idx = Ref{Int}(v_idx)
    if excluding_pq.left.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(excluding_pq.left, idx, gid_to_l)
    end 
    if excluding_pq.right.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(excluding_pq.right, idx, gid_to_l) 
    end 
end 

function update_src_adjs(p_pq::Adjacency, excluding_p_pq::Adjacency, u_idx::Int, v_idx::Int, src_adjs::Vector{Adjacency})
    if u_idx == 0
        u_idx = length(src_adjs)+1
        push!(src_adjs, p_pq)
    end
    if v_idx == 0
        v_idx = length(src_adjs)+1
        push!(src_adjs, p_pq)
    end 
    
    src_adjs[u_idx] = p_pq 
    src_adjs[v_idx] = excluding_p_pq 

    return u_idx, v_idx
end 

function update_documentation(p::GeneEnd, q::GeneEnd, u_idx::Int, v_idx::Int, src_adjs::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}})
    if q == GeneEnd(Telomere())
        #  replace u in A by...
        # {p} and (u\{p})  
        p_adj = Adjacency(p, GeneEnd(Telomere()))
        excluding_p = Adjacency(other_adjacency_end(p, src_adjs[u_idx]), GeneEnd(Telomere()))
    
        # p, excluding_p
        p_pq  = p_adj
        excluding_p_pq = excluding_p
    else 
        #  replace adj u and v in A by...
        # {p, q} and 
        pq = Adjacency(p, q)
    
        # u\{p}) U (v\{q}
        other_ge_u = other_adjacency_end(p, src_adjs[u_idx])
        other_ge_v = other_adjacency_end(q, src_adjs[v_idx])
        excluding_pq = Adjacency(other_ge_u, other_ge_v) 

        # pq, excluding_pq
        p_pq  = pq
        excluding_p_pq = excluding_pq
    end 


    # update src adjs
    u_idx, v_idx = update_src_adjs(p_pq, excluding_p_pq, u_idx, v_idx, src_adjs)
    # update geneend locations  
    reassign_locs(p_pq, excluding_p_pq, u_idx, v_idx, src_gid_to_l)

    return p_pq, excluding_p_pq
end 


update_documentation (generic function with 1 method)

In [3]:
# find dcj operations and distance
function find_dcj_dist_ops(src_adjs::Vector{Adjacency}, tar_adj_list::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}}, tar_gid_to_l::DefaultDict{Int, Vector{Int}}, mode::String)
    count = 0  
    telomere_idxs = Vector{Int}()
    updated_adj_list = src_adjs

    # println("indexing through target adj list...\n")
    
    # println("processing adj with two gene ends... ")
    # for each adj {p, q} in target genome 
    for (i, adj) in pairs(tar_adj_list) 
        p = adj.left  
        q = adj.right  
        
        if p.gene == Telomere() || q.gene == Telomere()  # telomeres handled in next loop
            push!(telomere_idxs, i)  

        else 
            # print("\n\n")
            # show(updated_adj_list)
            
            # print("\nADJ ", i, " :::: ")
            # show(adj, false)
            # print("\n")

            #  u = ge of adj from genome A that == p
            #  v = ge of adj from genome A that == q
            u_idx = find_adj(p, src_gid_to_l, src_adjs)  
            v_idx = find_adj(q, src_gid_to_l, src_adjs)  

            #  if u != v, replace u and v in A by {p, q} and (u\{p}) U (v\{q})
            if u_idx != v_idx  
                # print("prev adj list ::: ")
                # show(updated_adj_list, src_adjs[u_idx], src_adjs[v_idx])

                print("DCJ :: ")
                show(src_adjs[u_idx], true)
                show(src_adjs[v_idx], true)
                print(" --> ")

                pq, excluding_pq = update_documentation(p, q, u_idx, v_idx, src_adjs, src_gid_to_l)
              
                show(pq, true) 
                show(excluding_pq, true)
                println()

                # print("\nupdated adj list ::: ")
                # updated_adj_list = reorder_adjs(src_adjs, src_gid_to_l)
                # show(updated_adj_list, pq, excluding_pq)

                
                # println("\n\nupdated src_adjs")
                # show(src_adjs)
                # println("\nupdated src_gid_to_location")
                # show(src_gid_to_l, id_to_char)
            
                count += 1 
            end   
        end
    end 

    #  println("\nprocessing telomeres... ")

    # for each telomere {p} in genome B 
    for idx in telomere_idxs
        # print("\n\n")
        # show(updated_adj_list)
        
        p = tar_adj_list[idx]
        p_non_telo = other_adjacency_end(GeneEnd(Telomere()), p)
        
        # print("\nADJ ", idx, " :::: ")
        # show(p, false)
        # print("\n")

        #  u = ge of adj from genome A that == p 
        u_idx = find_adj(p_non_telo, src_gid_to_l, src_adjs)  
        
        # #  if u is an adjacency, then replace u in A by {p} and (u\{p}) 
        if other_adjacency_end(p_non_telo, src_adjs[u_idx]).gene != Telomere()
            # print("prev adj list ::: ")
            # show(updated_adj_list, p)

            print("DCJ :: ")
            show(src_adjs[u_idx], true)
            print(" --> ")
           
            p, excluding_p = update_documentation(p_non_telo, GeneEnd(Telomere()), u_idx, 0, src_adjs, src_gid_to_l)
            
            show(p, true) 
            show(excluding_p, true)
            println()

            # print("\nupdated adj list ::: ")
            # updated_adj_list = reorder_adjs(src_adjs, src_gid_to_l)
            # show(updated_adj_list, p, excluding_p)

            
            # println("\n\nupdated src_adjs")
            # show(src_adjs)
            # println("\nupdated src_gid_to_location")
            # show(src_gid_to_l, id_to_char)

            count += 1 
        end 
    end 

    return count
end 



find_dcj_dist_ops (generic function with 1 method)

In [4]:
# src = ".a.,b"
# target="A,.b."
# calculate_distance(src, target)