In [1]:
using Plots
using Combinatorics

using NBInclude
@nbinclude("randommap.ipynb")
@nbinclude("testing_RM.ipynb")


In [2]:
function convert_maptodcjdist_to_converse(map_to_dedupstr_dcjdist::Dict{Vector{Int}, Tuple{String, Int}})
    dcjdist_to_map = Dict{Int, Vector{Vector{Int}}}()

    for (map, tuple) in map_to_dedupstr_dcjdist
        dcjdist = tuple[2]

        if dcjdist in keys(dcjdist_to_map)
            push!(dcjdist_to_map[dcjdist], map)
        else 
            dcjdist_to_map[dcjdist] = [map]
        end 
    end 

    return dcjdist_to_map
end 

convert_maptodcjdist_to_converse (generic function with 1 method)

In [3]:
function insert_new_map_into_documentation(neighbor::Vector{Int}, dcjdist::Int, dedup_genome::String, map_to_dedupstr_dcjdist::Dict{Vector{Int}}, dcjdist_to_map::Dict{Int, Vector{Vector{Int}}}, sorted_dists::Vector{Int})
    if neighbor in keys(map_to_dedupstr_dcjdist) 
        throw(ArgumentError("ERROR: generated a repeat map"))
    end

    map_to_dedupstr_dcjdist[neighbor] = (dedup_genome, dcjdist)
    
    if dcjdist in keys(dcjdist_to_map)
        push!(dcjdist_to_map[dcjdist], neighbor)
    else 
        dcjdist_to_map[dcjdist] = [neighbor]
    end 

    if dcjdist ∉ sorted_dists
        idx = searchsortedfirst(sorted_dists, dcjdist)
        insert!(sorted_dists, idx, dcjdist)    
    end 
end 

insert_new_map_into_documentation (generic function with 1 method)

In [4]:
# old neighbor definition (increment one element of the map by 1)
function find_old_neighbor(idxs_of_neighbors::Set{Int}, map_to_explore::Vector{Int}, S_dupchar_to_multiplicity::OrderedDict{Char, Int})
    i = rand(idxs_of_neighbors)
    pop!(idxs_of_neighbors, i)

    neighbor = deepcopy(map_to_explore)
    neighbor[i] += 1    

    # mod(multiplicity!)
    multiplicity = S_dupchar_to_multiplicity[collect(keys(S_dupchar_to_multiplicity))[i]]
    if neighbor[i] > factorial(multiplicity) 
        neighbor[i] = 1 
    end 
    return neighbor
end 

function cap_value_by_max(val::Int, max_possible::Int) 
    if val > max_possible 
        return val 
    else 
        return max_possible  
    end 
end 

cap_value_by_max (generic function with 1 method)

In [5]:
function permlexrank(n::Int, perm::Vector{Int})
    r = 1
    p = deepcopy(perm) 

    for j in 1:n
        r += (p[j]-1) * (factorial(n-j))

        for i in j+1:n
            if p[i] > p[j]
                p[i] = p[i]-1
            end 
        end 
    end 
    return r 
end 

# n = 4 
# perm = [4, 3, 2, 1]

# n = 2
# perm = [1, 2]

# permlexrank(n, perm)

permlexrank (generic function with 1 method)

In [6]:
function convert_lex_rank_to_ints(rank::Int, char::Char, mult::Int, details::String)
    orig_indexing = Dict{Int, Char}() 

    indices = collect(1:mult)
    nthperm!(indices, rank)
    
    details = details * "    order of " * string(mult) * " dups of '" * char * "' " * string(indices) * " (rank " * string(rank) * ")"

    # randomly choose two unique indices/chars 
    rand_idx1 = rand(1:mult)
    rand_idx2 = rand(1:mult)
    while rand_idx2 == rand_idx1 
        rand_idx2 = rand(1:mult)
    end 

    # swap those two 
    tmp  = indices[rand_idx1]
    indices[rand_idx1] = indices[rand_idx2]
    indices[rand_idx2] = tmp

    new_rank = permlexrank(mult, indices)

    details = details * " --> " * string(indices) * " (rank " * string(new_rank) * ")"   # swapped 2 rand elem

    return new_rank, details

end 

convert_lex_rank_to_ints (generic function with 1 method)

In [7]:
function find_min_change_neighbor(map_to_explore::Vector{Int}, dupchar_to_multiplicity::OrderedDict{Char, Int}, mapidx_to_char::Dict{Int, Char})
    neighbor = deepcopy(map_to_explore)

    i = rand(1:length(neighbor))
    rank = neighbor[i]
    char = mapidx_to_char[i]
    mult = dupchar_to_multiplicity[char]

    details = string("changing map for " * char * " (idx " * string(i) * " in map)")

    new_rank, details = convert_lex_rank_to_ints(rank, char, mult, details) 
    neighbor[i] = new_rank   
    
    return neighbor, details
end 

find_min_change_neighbor (generic function with 1 method)

In [8]:
# given string S and two maps m and v, v = neighbor(m) if 
    # for a replicated char α at idx i in the maps, v[i] = (m[i] + 1) (mod occ(α,S)!)  
    # all other chars (idx e) are mapped the same way v[e] = m[e]
function find_neighbors(map_to_explore::Vector{Int}, max_neighbors_to_explore::Int, map_to_dedupstr_dcjdist::Dict{Vector{Int}, Tuple{String, Int}}, dupchar_to_multiplicity::OrderedDict{Char, Int}, dupchar_to_unique_chars::OrderedDict{Char, Vector{Char}}, mapidx_to_char::Dict{Int, Char}, min_change_neighbor::Bool, mode::Int)  
    neighbors = Set{Vector{Int}}()
    
    if min_change_neighbor
        #  cap neighbors to explore 
        max_possible_minchangeneigh = 0 
        for v in values(dupchar_to_multiplicity)
            max_possible_minchangeneigh += binomial(v, 2)
        end 
        max_neighbors_to_explore = cap_value_by_max(max_neighbors_to_explore, max_possible_minchangeneigh) 
        seen_neighbors = Set{Vector{Int}}()
        
        neighbor = map_to_explore
        for i in 1:max_neighbors_to_explore
            details = ""
            while (neighbor in neighbors) || (neighbor in keys(map_to_dedupstr_dcjdist))
                if length(seen_neighbors) == max_neighbors_to_explore
                   return neighbors 
                end 
                if neighbor ∉ seen_neighbors
                    push!(seen_neighbors, neighbor)
                end 

                neighbor, details = find_min_change_neighbor(map_to_explore, dupchar_to_multiplicity, mapidx_to_char)
            end 
            if mode >= 1 
                println("     found ", neighbor, " by ", details)
            end 
            push!(neighbors, neighbor)
        end 

        return neighbors
        
    else # old definition of neighbors given in paper  
        idxs_of_neighbors = Set(range(1, length(map_to_explore)))  # index of the character in a map that's incremented 
    
        # cap neighbors to explore
        max_neighbors_to_explore = cap_value_by_max(max_neighbors_to_explore, length(map_to_explore)) 

        neighbor = map_to_explore
        for i in 1:max_neighbors_to_explore
            while (neighbor in neighbors) || (neighbor in keys(map_to_dedupstr_dcjdist))
                if length(idxs_of_neighbors) == 0
                    return collect(neighbors)
                end
                neighbor = find_old_neighbor(idxs_of_neighbors, map_to_explore, dupchar_to_multiplicity)
            end 

            push!(neighbors, neighbor)
        end 

        return collect(neighbors)
    end    
end 



find_neighbors (generic function with 1 method)

In [13]:
# local search heuristic

# total_maps = total number of maps to be created 
# rand_maps = number of maps randomly generated 
# max_neighbors = max number of neighbors explored in each local search
function localsearch(S::String, P::String, total_maps::Int, rand_maps::Int, max_neighbors::Int, mode::String, dupchar_to_unique_chars::OrderedDict{Char, Vector{Char}}, min_change_neighbor::Bool)
     m = setmode(mode)     

    printstyled("SRC " * S * " --> TARGET " * P * "\n", color=:cyan)
    println("total maps=", total_maps, " || num_rand_maps=", rand_maps, " || max_neighbors=", max_neighbors)

    # create arbitrary map for P, a set of random maps S_M, rank maps using estimator algo 
    _, P_dedup, map_to_dedupstr_dcjdist, S_dupchar_to_multiplicity, mapidx_to_char = generate_random_maps_and_calc_distances(S, P, rand_maps, dupchar_to_unique_chars, m)
    
    og_rm_to_dedupstr_dcjdist = deepcopy(map_to_dedupstr_dcjdist)

    # cap maps explored 
    total_possible_maps = 0
    for (_, mult) in S_dupchar_to_multiplicity
        total_possible_maps += factorial(mult)
    end 
    if total_maps > total_possible_maps 
        total_maps = total_possible_maps 
        if m >= 1 println("WARNING: capping total maps at ", total_maps) end 
    end 
    if rand_maps > total_maps
        rand_maps = total_maps 
        if m >= 1 println("WARNING: capping rand maps at ", rand_maps) end  
    end 
    if max_neighbors > total_maps - rand_maps
        max_neighbors = total_maps - rand_maps
        if m >= 1 println("WARNING: capping max neighbors at ", max_neighbors) end 
    end 
    
    num_generated_maps = rand_maps

    
    dcj_dist_to_map = convert_maptodcjdist_to_converse(map_to_dedupstr_dcjdist)
    sorted_dists = sort(collect(keys(dcj_dist_to_map)))

    # initialize vars storing best map and corresponding details (returned at the end)
    smallest_dcj_dist = sorted_dists[1]
    global_min_map = dcj_dist_to_map[smallest_dcj_dist][1]
    global_min_dcj = sorted_dists[1]
    global_min_dedupstr = map_to_dedupstr_dcjdist[global_min_map][1]

    
    # until 'total_maps' maps are generated
    while total_maps != num_generated_maps
        # select best not yet explored map 
        smallest_dcj_dist = sorted_dists[1]
        maps = dcj_dist_to_map[smallest_dcj_dist]
        
        map_smallestd = popfirst!(maps)
        if m >= 1
            println("\ngenerated ", num_generated_maps, "/", total_maps)
            println("exploring neighborhood of ", map_smallestd)
        end 
        if isempty(maps)
            delete!(dcj_dist_to_map, smallest_dcj_dist)
            popfirst!(sorted_dists)
        end 
    
        # cap neighbors 
        if max_neighbors > (total_maps - num_generated_maps) 
            max_neighbors = total_maps - num_generated_maps    
        end 
        # searches up to 'max_neighbors' neighbor maps
        neighbors = find_neighbors(map_smallestd, max_neighbors, map_to_dedupstr_dcjdist, S_dupchar_to_multiplicity, dupchar_to_unique_chars, mapidx_to_char, min_change_neighbor, m)
        # insert neighbors into documentation 
        if m >= 1
            println("neighbors explored: ")
        end 

        i = 1
        for n in neighbors  
            s_dedup = deduplicate_genome(S, S_dupchar_to_multiplicity, n, dupchar_to_unique_chars)
            d = calculate_distance(P_dedup, s_dedup, "none")
            insert_new_map_into_documentation(n, d, s_dedup, map_to_dedupstr_dcjdist, dcj_dist_to_map, sorted_dists)
            if m >= 1
                println("     ", n, " dcj dist=", d)
            end 

            if d < global_min_dcj
                global_min_map  = n
                global_min_dcj =  map_to_dedupstr_dcjdist[global_min_map][2]
                global_min_dedupstr = map_to_dedupstr_dcjdist[global_min_map][1]

                if m >= 1
                    print("    !!!!!found a min dcj mapping ", global_min_map, " with distance ", global_min_dcj, "   ")
                    println(P_dedup, " --> ", global_min_dedupstr)
                end 
            end 
            i += 1
        end 
            
        # no repeat exploration 
        num_generated_maps += length(neighbors)
    end

    return global_min_dcj, global_min_map, global_min_dedupstr, map_to_dedupstr_dcjdist, og_rm_to_dedupstr_dcjdist
end 


localsearch (generic function with 1 method)

In [15]:
n = 3
x = 2

target = generate_target_with_xdup(n, x)
src = scramble_target(target)

# src = ".a.,aab,bc"  
# target = "ba,a,b,ca"

total_maps = 1
rand_maps = 8
max_neighbors = 2

mode = "info"
dupchar_to_unique_chars = OrderedDict{Char, Vector{Char}}()
min_change_neighbor = true 

global_min_dcj, global_min_map, global_min_dedupstr, map_to_dedupstr_dcjdist, og_rm = localsearch(src, target, total_maps, rand_maps, max_neighbors, mode, dupchar_to_unique_chars, min_change_neighbor)

[36mSRC .cc.,bca --> TARGET acbcc[39m
total maps=1 || num_rand_maps=8 || max_neighbors=2
[35mtarget[39m
dup genes --> multiplicity
OrderedDict('c' => 3)

target map
[1]

[35msource[39m
dup genes --> multiplicity
OrderedDict('c' => 3)

[35mdeduplication[39m
[6] .kd.,bca --> acbdk 4
[5] .kc.,bda --> acbdk 4
[3] .dc.,bka --> acbdk 4
[4] .dk.,bca --> acbdk 4
[1] .cd.,bka --> acbdk 4
[2] .ck.,bda --> acbdk 4



(4, [3], ".dc.,bka", Dict([3] => (".dc.,bka", 4), [1] => (".cd.,bka", 4), [5] => (".kc.,bda", 4), [4] => (".dk.,bca", 4), [6] => (".kd.,bca", 4), [2] => (".ck.,bda", 4)), Dict([3] => (".dc.,bka", 4), [1] => (".cd.,bka", 4), [5] => (".kc.,bda", 4), [4] => (".dk.,bca", 4), [6] => (".kd.,bca", 4), [2] => (".ck.,bda", 4)))