In [1]:
using NBInclude
@nbinclude("randommap.ipynb")

("ba,q,e,ck", 4, ".q.,kae,bc", [4, 2], 6, ".a.,qkb,ec", [1, 1], OrderedDict('a' => ['a', 'q', 'k'], 'b' => ['b', 'e']))

In [2]:
function convert_maptodcjdist_to_converse(map_to_dedupstr_dcjdist::Dict{Vector{Int}, Tuple{String, Int}})
    dcjdist_to_map = Dict{Int, Vector{Vector{Int}}}()

    for (map, tuple) in map_to_dedupstr_dcjdist
        dcjdist = tuple[2]

        if dcjdist in keys(dcjdist_to_map)
            push!(dcjdist_to_map[dcjdist], map)
        else 
            dcjdist_to_map[dcjdist] = [map]
        end 
    end 

    return dcjdist_to_map
end 

convert_maptodcjdist_to_converse (generic function with 1 method)

In [3]:
# given string S and two maps m and v, v = neighbor(m) if 
    # for a replicated char α at idx i in the maps, v[i] = (m[i] + 1) (mod occ(α,S)!)  
    # all other chars (idx e) are mapped the same way v[e] = m[e]
function find_neighbors(map_to_explore::Vector{Int}, max_neighbors_to_explore::Int, map_to_dedupstr_dcjdist::Dict{Vector{Int}, Tuple{String, Int}}, S_dupchar_to_multiplicity::OrderedDict{Char, Int})  
    neighbors = Set{Vector{Int}}()
    
    idxs_of_neighbors = Set(range(1, length(map_to_explore)))  # index of the character in a map that's incremented 

    max_neighbors = length(map_to_explore)
    if max_neighbors_to_explore > max_neighbors 
        max_neighbors_to_explore = max_neighbors  # cap this value 
    end 

    neighbor = map_to_explore
    for i in 1:max_neighbors_to_explore
        while (neighbor in neighbors) || (neighbor in keys(map_to_dedupstr_dcjdist))
            if length(idxs_of_neighbors) == 0
                return collect(neighbors)
            end

            i = rand(idxs_of_neighbors)
            pop!(idxs_of_neighbors, i)

            neighbor = deepcopy(map_to_explore)
            neighbor[i] += 1    
            multiplicity = S_dupchar_to_multiplicity[collect(keys(S_dupchar_to_multiplicity))[i]]
            if neighbor[i] > factorial(multiplicity) 
                neighbor[i] = 1 
            end 
        end 

        if neighbor != map_to_explore 
            push!(neighbors, neighbor)
        end 
    end 
    return collect(neighbors) 
end 


function insert_new_map_into_documentation(neighbor::Vector{Int}, dcjdist::Int, dedup_genome::String, map_to_dedupstr_dcjdist::Dict{Vector{Int}}, dcjdist_to_map::Dict{Int, Vector{Vector{Int}}}, sorted_dists::Vector{Int})
    if neighbor in keys(map_to_dedupstr_dcjdist) 
        throw(ArgumentError("uhhhh generated a repeat map"))
    end

    map_to_dedupstr_dcjdist[neighbor] = (dedup_genome, dcjdist)
    
    if dcjdist in keys(dcjdist_to_map)
        # printstyled(dcjdist_to_map, color=:blue)
        push!(dcjdist_to_map[dcjdist], neighbor)
        # printstyled(dcjdist_to_map, color=:blue)
    else 
        dcjdist_to_map[dcjdist] = [neighbor]
    end 

    if dcjdist ∉ sorted_dists
        idx = searchsortedfirst(sorted_dists, dcjdist)
        insert!(sorted_dists, idx, dcjdist)    
    end 
end 

insert_new_map_into_documentation (generic function with 1 method)

In [59]:
# local search heuristic

# total_maps = total number of maps to be created 
# rand_maps = number of maps randomly generated 
# max_neighbors = max number of neighbors explored in each local search
function localsearch(S::String, P::String, total_maps::Int, rand_maps::Int, max_neighbors::Int, mode::String, dupchar_to_unique_chars::OrderedDict{Char, Vector{Char}})
    printstyled("SRC " * S * " --> TARGET " * P * "\n", color=:cyan)
    m = 0

    global_min_map = Vector{Int}() 
    global_min_dcj = Inf
    global_min_dedupstr = ""
    
    # create arbitrary map for P, a set of random maps S_M, rank maps using estimator algo 
    P_map, P_dedup, map_to_dedupstr_dcjdist, S_dupchar_to_multiplicity = generate_random_maps_and_calc_distances(S, P, rand_maps, dupchar_to_unique_chars, m)
    num_generated_maps = rand_maps
    explored_maps = Set{Vector{Int}}()

    print("random maps: ", keys(map_to_dedupstr_dcjdist), "\n")

    total_possible_maps = 0
    for (_, mult) in S_dupchar_to_multiplicity
        total_possible_maps += factorial(mult)
    end 
    if total_maps > total_possible_maps 
        total_maps = total_possible_maps 
        println("note: capping total maps")
    end 
    
    dcj_dist_to_map = convert_maptodcjdist_to_converse(map_to_dedupstr_dcjdist)
    sorted_dists = sort(collect(keys(dcj_dist_to_map)))
    
    # until 'total_maps' maps are generated
    while total_maps != num_generated_maps
        println("\ngenerated ", num_generated_maps, "/", total_maps)
        # select best not yet explored map 
        smallest_dcj_dist = sorted_dists[1]
        maps = dcj_dist_to_map[smallest_dcj_dist]
        
        map_smallestd = popfirst!(maps)
        println("exploring neighborhood of ", map_smallestd)
        if isempty(maps)
            delete!(dcj_dist_to_map, smallest_dcj_dist)
            popfirst!(sorted_dists)
        end 
        
        if smallest_dcj_dist < global_min_dcj
            global_min_map  = map_smallestd
            global_min_dcj =  map_to_dedupstr_dcjdist[global_min_map][2]
            global_min_dedupstr = map_to_dedupstr_dcjdist[global_min_map][1]
            print("\\\\\\\\found a min dcj mapping ", global_min_map, " with distance ", global_min_dcj, "   ")
            println(P_dedup, " --> ", global_min_dedupstr)
        end 

        # searches up to 'max_neighbors' neighbor maps, adding them to M 
        if max_neighbors > total_maps - num_generated_maps 
            max_neighbors = total_maps - num_generated_maps   # cap it 
        end 
        neighbors = find_neighbors(map_smallestd, max_neighbors, map_to_dedupstr_dcjdist, S_dupchar_to_multiplicity)
        
        # insert neighbors into documentation 
        println("neighbors explored: ")
        for n in neighbors 
            s_dedup = deduplicate_genome(n, S, S_dupchar_to_multiplicity, dupchar_to_unique_chars)
            d = calculate_distance(P_dedup, s_dedup, "none")
            insert_new_map_into_documentation(n, d, s_dedup, map_to_dedupstr_dcjdist, dcj_dist_to_map, sorted_dists)
            println("     ", n, " dcj dist=", d)
            if d < global_min_dcj
                global_min_map  = n
                global_min_dcj =  map_to_dedupstr_dcjdist[global_min_map][2]
                global_min_dedupstr = map_to_dedupstr_dcjdist[global_min_map][1]
                print("\\\\\\\\found a min dcj mapping ", global_min_map, " with distance ", global_min_dcj, "   ")
                println(P_dedup, " --> ", global_min_dedupstr)
            end 
        end 
            
        # track all explored_maps maps (st no repeat exploration) 
        num_generated_maps += length(neighbors)
        push!(explored_maps, map_smallestd)
    end
    # return map that returns the shortest DCJ dist 
    return global_min_dcj, global_min_map, global_min_dedupstr
end 


src = ".a.,aab,bc"  
target = "ba,a,b,ca"

total_maps = 10
rand_maps = 3
max_neighbors = 3

mode = "none"
dupchar_to_unique_chars = OrderedDict{Char, Vector{Char}}()

localsearch(src, target, total_maps, rand_maps, max_neighbors, mode, dupchar_to_unique_chars)

[36mSRC .a.,aab,bc --> TARGET ba,a,b,ca[39m
random maps: [[1, 1], [6, 1], [4, 1]]
note: capping total maps

generated 3/8
exploring neighborhood of [6, 1]
\\\\found a min dcj mapping [6, 1] with distance 4   ba,f,t,ce --> .e.,fab,tc
neighbors explored: 
     [6, 2] dcj dist=6

generated 4/8
exploring neighborhood of [4, 1]
neighbors explored: 
     [4, 2] dcj dist=4
     [5, 1] dcj dist=4

generated 6/8
exploring neighborhood of [4, 2]
neighbors explored: 
     [5, 2] dcj dist=6

generated 7/8
exploring neighborhood of [5, 1]
neighbors explored: 

generated 7/8
exploring neighborhood of [1, 1]
neighbors explored: 
     [2, 1] dcj dist=6


(4, [6, 1], ".e.,fab,tc")