In [1]:
using NBInclude
@nbinclude("datastructs.ipynb")
@nbinclude("helpers.ipynb")

update_adj_set (generic function with 1 method)

In [2]:
function reassign_locs(u_idx::Int, v_idx::Int, pq::Adjacency, excluding_pq::Adjacency, gid_to_l::DefaultDict{Int, Vector{Int}})
    # want to update given: 
    # src_adjs[u_idx] = pq 
    # src_adjs[v_idx] = excluding_pq 

    idx = Ref{Int}(u_idx)
    if pq.left.gene != Telomere() 
        process_adj_list_helper(pq.left, idx, gid_to_l)
    end 
    if pq.right != Telomere() 
        process_adj_list_helper(pq.right, idx, gid_to_l)
    end 
    
    idx = Ref{Int}(v_idx)
    if excluding_pq.left.gene != Telomere() 
        process_adj_list_helper(excluding_pq.left, idx, gid_to_l)
    end 
    if excluding_pq.right.gene != Telomere() 
        process_adj_list_helper(excluding_pq.right, idx, gid_to_l) 
    end 
end 

function update_documentation(u_idx::Int, v_idx::Int, pq::Adjacency, excluding_pq::Adjacency, src_adjs::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}})
    # update src adjs
    if u_idx == 0
        u_idx = length(src_adjs)+1
        push!(src_adjs, pq)
    end
    if v_idx == 0
        v_idx = length(src_adjs)+1
        push!(src_adjs, excluding_pq)
    end 
    
    src_adjs[u_idx] = pq 
    src_adjs[v_idx] = excluding_pq 

    # update geneend locations  
    reassign_locs(u_idx, v_idx, pq, excluding_pq, src_gid_to_l)
end 


update_documentation (generic function with 1 method)

In [4]:
# find dcj operations and distance
function find_dcj_dist_ops(src_adjs::Vector{Adjacency}, tar_adj_list::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}}, tar_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adj_set::Set{Adjacency}, target_adj_set::Set{Adjacency})
    count = 0  
    telomere_idxs = Vector{Int}()
    updated_adj_list = src_adjs

    print("indexing through target adj list...")
    
    
    # for each adj {p, q} in target genome 
    for (i, adj) in pairs(tar_adj_list) 
        p = adj.left  
        q = adj.right  
        if p.gene == Telomere() || q.gene == Telomere()  # telomeres handled in next loop
            push!(telomere_idxs, i)  
        else 
            print("\n\n")
            show(updated_adj_list)
            
            print("\nADJ ", i, " :::: ")
            show(adj, false)
            print("\n")

            #  let u be ge of adj from genome A that == p.gene 
            #  let v be ge of adj from genome A that == q.gene 
            u, u_idx, u_adj = find_tar_ge_in_src_adjs(p, src_gid_to_l, tar_gid_to_l, src_adjs)  
            v, v_idx, v_adj = find_tar_ge_in_src_adjs(q, src_gid_to_l, tar_gid_to_l, src_adjs)  
            
            #  if u != v, replace u and v in A by {p, q} and (u\{p}) U (v\{q})
            if u_idx != v_idx  
                print("prev adj list ::: ")
                show(convert_adj_set_as_adj_list(src_adj_set), u_adj, v_adj)
 
                if u.gene == Telomere() && v.gene == Telomere() 
                    u_partner = non_telo_end(u_adj)
                    v_partner = non_telo_end(v_adj)
                    pq = combine_ge(u_partner, u_idx, v_partner, v_idx)
                    two_telos = Adjacency(GeneEnd(Telomere(), false), GeneEnd(Telomere(), false))

                    #  remove adj with u, v in src_adj_set 
                    delete!(src_adj_set, src_adjs[u_idx])
                    delete!(src_adj_set, src_adjs[v_idx])

                    #  add pq to src_adj_set (not two-telomere adjacency)
                    push!(src_adj_set, pq)
                    push!(src_adj_set, two_telos)              

                    print("\nupdated adj list ::: ")
                    updated_adj_list = convert_adj_set_as_adj_list(src_adj_set)
                    show(updated_adj_list, pq, two_telos)

                    update_documentation(u_idx, v_idx, pq, two_telos, src_adjs, src_gid_to_l)
                    
                    # println("\n\nupdated src_adjs")
                    # show(src_adjs)
                    # println("\nupdated src_gid_to_location")
                    # show(src_gid_to_l, id_to_char)
                    
                elseif u.gene == Telomere() || v.gene == Telomere() 
                    # print("[[ u or v  is a telomere ]]") 
                    pq = combine_ge(p, u_idx, q, v_idx)
                    if u.gene == Telomere() 
                        other_ge_v = other_adjacency_end(v, src_adjs[v_idx])
                        one_telo = combine_ge(GeneEnd(Telomere(), false), u_idx, other_ge_v, v_idx)
                    else 
                        other_ge_u = other_adjacency_end(u, src_adjs[u_idx])
                        one_telo = combine_ge(other_ge_u, u_idx, GeneEnd(Telomere(), false), v_idx)
                    end 

                     #  remove adj with u, v in src_adj_set 
                    delete!(src_adj_set, src_adjs[u_idx])
                    delete!(src_adj_set, src_adjs[v_idx])
                    
                    #  add adjusted adjacencies to src_adj_set
                    push!(src_adj_set, pq)
                    push!(src_adj_set, one_telo)
                    
                    #  print("updated adj set ::: ")
                    # show(src_adj_set)
                    print("\nupdated adj list ::: ")
                    updated_adj_list = convert_adj_set_as_adj_list(src_adj_set)
                    show(updated_adj_list, pq, one_telo)

                    update_documentation(u_idx, v_idx, pq, one_telo, src_adjs, src_gid_to_l)
                    
                    # println("\n\nupdated src_adjs")
                    # show(src_adjs)
                    # println("\nupdated src_gid_to_location")
                    # show(src_gid_to_l, id_to_char)

                else # both are adj with two gene ends (no telomeres)   
                    pq, excluding_pq = update_adj_set(p, q, u, v, u_idx, v_idx, src_adjs, src_adj_set)
                    
                    # print("updated adj set ::: ")
                    # show(src_adj_set)
                    print("\nupdated adj list ::: ")
                    updated_adj_list = convert_adj_set_as_adj_list(src_adj_set)
                    show(updated_adj_list, pq, excluding_pq)

                    update_documentation(u_idx, v_idx, pq, excluding_pq, src_adjs, src_gid_to_l)
                    
                    # println("\n\nupdated src_adjs")
                    # show(src_adjs)
                    # println("\nupdated src_gid_to_location")
                    # show(src_gid_to_l, id_to_char)
                end
                count += 1 
            end   
        end
    end 

    # for each telomere {p} in genome B 
    for idx in telomere_idxs
        print("\n\n")
        show(updated_adj_list)
        
        telo_adj = tar_adj_list[idx]
        non_telo_ge = other_adjacency_end(GeneEnd(Telomere(), false), telo_adj)
        
        print("\nADJ ", idx, " :::: ")
        show(telo_adj, false)
        print("\n")

        #  let u be ge of genome A that contains p 
        u, u_idx, u_adj = find_tar_ge_in_src_adjs(non_telo_ge, src_gid_to_l, tar_gid_to_l, src_adjs)  

        #  if u is an adjacency, then replace u in A by {p} and (u\{p}) 
        if u.gene != Telomere()
            p = Adjacency(u, GeneEnd(Telomere(), false))
            excluding_p = Adjacency(other_adjacency_end(u, u_adj), GeneEnd(Telomere(), false))

            #  remove adj with u, v in src_adj_set 
            delete!(src_adj_set, src_adjs[u_idx])

            #  add p, excluding_p to src_adj_set 
            push!(src_adj_set, p)
            push!(src_adj_set, excluding_p)   

            # print("updated adj set ::: ")
            # show(src_adj_set)
            print("\nupdated adj list ::: ")
            updated_adj_list = convert_adj_set_as_adj_list(src_adj_set)
            show(updated_adj_list, p, excluding_p)

            update_documentation(u_idx, 0, p, excluding_p, src_adjs, src_gid_to_l)
            
            # println("\n\nupdated src_adjs")
            # show(src_adjs)
            # println("\nupdated src_gid_to_location")
            # show(src_gid_to_l, id_to_char)

            count += 1 
        end 
    end 

    return count 
end 

src = "ab.c."
target = ".abc." 

calculate_distance(src, target)

SRC ADJ LIST
(a:h,b:t)(b:h,a:t)(.,c:t)(c:h,.)
|
v
TARGET ADJ LIST
(.,a:t)(a:h,b:t)(b:h,c:t)(c:h,.)

*************
indexing through target adj list...

(a:h,b:t)(b:h,a:t)(.,c:t)(c:h,.)
ADJ 2 :::: [32m(a:h,b:t)[39m


(a:h,b:t)(b:h,a:t)(.,c:t)(c:h,.)
ADJ 3 :::: [32m(b:h,c:t)[39m
prev adj list ::: (c:h,.)[34m(.,c:t)[39m[34m(b:h,a:t)[39m(a:h,b:t)
updated adj list ::: (c:h,.)[34m(b:h,c:t)[39m(a:h,b:t)[34m(a:t,.)[39m

(c:h,.)(b:h,c:t)(a:h,b:t)(a:t,.)
ADJ 1 :::: [32m(.,a:t)[39m


(c:h,.)(b:h,c:t)(a:h,b:t)(a:t,.)
ADJ 4 :::: [32m(c:h,.)[39m


1