In [1]:
using NBInclude
# @nbinclude("logging.ipynb")
@nbinclude("datastructs.ipynb")
@nbinclude("helpers.ipynb")

reorder_adjs (generic function with 1 method)

In [2]:
function reassign_locs(u_idx::Int, v_idx::Int, pq::Adjacency, excluding_pq::Adjacency, gid_to_l::DefaultDict{Int, Vector{Int}})
    # want to update dictionary given: 
    # src_adjs[u_idx] = pq 
    # src_adjs[v_idx] = excluding_pq 

    idx = Ref{Int}(u_idx)
    if pq.left.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(pq.left, idx, gid_to_l)
    end 
    if pq.right != Telomere() 
        assign_ge_idx_to_gid_to_locdict(pq.right, idx, gid_to_l)
    end 
    
    idx = Ref{Int}(v_idx)
    if excluding_pq.left.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(excluding_pq.left, idx, gid_to_l)
    end 
    if excluding_pq.right.gene != Telomere() 
        assign_ge_idx_to_gid_to_locdict(excluding_pq.right, idx, gid_to_l) 
    end 
end 

function update_documentation(u_idx::Int, v_idx::Int, pq::Adjacency, excluding_pq::Adjacency, src_adjs::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}})
    # update src adjs
    if u_idx == 0
        u_idx = length(src_adjs)+1
        push!(src_adjs, pq)
    end
    if v_idx == 0
        v_idx = length(src_adjs)+1
        push!(src_adjs, excluding_pq)
    end 
    
    src_adjs[u_idx] = pq 
    src_adjs[v_idx] = excluding_pq 

    # update geneend locations  
    reassign_locs(u_idx, v_idx, pq, excluding_pq, src_gid_to_l)
end 


update_documentation (generic function with 1 method)

In [8]:
# find dcj operations and distance
function find_dcj_dist_ops(src_adjs::Vector{Adjacency}, tar_adj_list::Vector{Adjacency}, src_gid_to_l::DefaultDict{Int, Vector{Int}}, tar_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adj_set::Set{Adjacency}, target_adj_set::Set{Adjacency})
    count = 0  
    telomere_idxs = Vector{Int}()
    updated_adj_list = src_adjs

    println("indexing through target adj list...\n")
    
    println("processing adj with two gene ends... ")
    # for each adj {p, q} in target genome 
    for (i, adj) in pairs(tar_adj_list) 
        p = adj.left  
        q = adj.right  
        
        if p.gene == Telomere() || q.gene == Telomere()  # telomeres handled in next loop
            push!(telomere_idxs, i)  

        else 
            # print("\n\n")
            # show(updated_adj_list)
            
            print("\nADJ ", i, " :::: ")
            show(adj, false)
            print("\n")

            #  u = ge of adj from genome A that == p
            #  v = ge of adj from genome A that == q
            u_idx = find_adj(p, src_gid_to_l, src_adjs)  
            v_idx = find_adj(q, src_gid_to_l, src_adjs)  

            #  if u != v, replace u and v in A by {p, q} and (u\{p}) U (v\{q})
            if u_idx != v_idx  
                # print("prev adj list ::: ")
                # show(updated_adj_list, src_adjs[u_idx], src_adjs[v_idx])

                print("DCJ Operation :: ")
                show(src_adjs[u_idx], true)
                show(src_adjs[v_idx], true)
                print(" --> ")
 
                pq, excluding_pq = update_adj_set(p, q, u_idx, v_idx, src_adjs, src_adj_set)
                update_documentation(u_idx, v_idx, pq, excluding_pq, src_adjs, src_gid_to_l)
                
                show(pq, true) 
                show(excluding_pq, true)
                println()

                # print("\nupdated adj set ::: ")
                # show(src_adj_set)

                # print("\nupdated adj list ::: ")
                # updated_adj_list = reorder_adjs(src_adjs, src_gid_to_l)
                # show(updated_adj_list, pq, excluding_pq)

                
                # println("\n\nupdated src_adjs")
                # show(src_adjs)
                # println("\nupdated src_gid_to_location")
                # show(src_gid_to_l, id_to_char)
            
                count += 1 
            end   
        end
    end 

     println("\nprocessing telomeres... ")

    # for each telomere {p} in genome B 
    for idx in telomere_idxs
        # print("\n\n")
        # show(updated_adj_list)
        
        p = tar_adj_list[idx]
        p_non_telo = other_adjacency_end(GeneEnd(Telomere()), p)
        
        print("\nADJ ", idx, " :::: ")
        show(p, false)
        print("\n")

        #  u = ge of adj from genome A that == p 
        u_idx = find_adj(p_non_telo, src_gid_to_l, src_adjs)  
        
        # #  if u is an adjacency, then replace u in A by {p} and (u\{p}) 
        if other_adjacency_end(p_non_telo, src_adjs[u_idx]).gene != Telomere()
            # print("prev adj list ::: ")
            # show(updated_adj_list, p)

            print("DCJ Operation :: ")
            show(src_adjs[u_idx], true)
            print(" --> ")
            
            p, excluding_p = update_adj_set(p, u_idx, src_adjs, src_adj_set) 
            update_documentation(u_idx, 0, p, excluding_p, src_adjs, src_gid_to_l)
            
            show(p, true) 
            show(excluding_p, true)
            println()

            # print("updated adj set ::: ")
            # show(src_adj_set)

            # print("\nupdated adj list ::: ")
            # updated_adj_list = reorder_adjs(src_adjs, src_gid_to_l)
            # show(updated_adj_list, p, excluding_p)

            
            # println("\n\nupdated src_adjs")
            # show(src_adjs)
            # println("\nupdated src_gid_to_location")
            # show(src_gid_to_l, id_to_char)

            count += 1 
        end 
    end 

    return count
end 



find_dcj_dist_ops (generic function with 1 method)

In [4]:
# permutations that require max number of dcj operations  - diameter of an instance 

# find the largest value of n which you can find the diameter 
# n --> diameter (randomly sample x number)
# increase n  

#     max and min distance maps distance 


#     mirjalil - python library on nature-inspired algos 
#     whale optimization algorithm (bubble net?) - populat and simple  
#     gray wolf optimization population algorithm
#     sim to 
#     differential evolution (not from mirjalil tho; it's older ) 

In [9]:
src = ".abcdefg."
target = ".abcdGFE." 

calculate_distance(src, target)

SRC ADJ LIST
(.,a:t)(a:h,b:t)(b:h,c:t)(c:h,d:t)(d:h,e:t)(e:h,f:t)(f:h,g:t)(g:h,.)
|
v
TARGET ADJ LIST
(.,a:t)(a:h,b:t)(b:h,c:t)(c:h,d:t)(d:h,g:h)(g:t,f:h)(f:t,e:h)(e:t,.)

*************
indexing through target adj list...

processing adj with two gene ends... 

ADJ 2 :::: [32m(a:h,b:t)[39m

ADJ 3 :::: [32m(b:h,c:t)[39m

ADJ 4 :::: [32m(c:h,d:t)[39m

ADJ 5 :::: [32m(d:h,g:h)[39m
DCJ Operation :: [34m(d:h,e:t)[39m[34m(g:h,.)[39m --> [34m(d:h,g:h)[39m[34m(e:t,.)[39m

ADJ 6 :::: [32m(g:t,f:h)[39m

ADJ 7 :::: [32m(f:t,e:h)[39m

processing telomeres... 

ADJ 1 :::: [32m(.,a:t)[39m

ADJ 8 :::: [32m(e:t,.)[39m


1