In [5]:
using NBInclude
@nbinclude("../heuristics/randommap.ipynb")
@nbinclude("../heuristics/localsearch.ipynb")

In [6]:
function read_file(genomesize::Int)
    filename = "db/rep_db" * string(genomesize) * ".txt"
    content = read(filename, String)
    genome_pairs = split(content, "|")

    return genome_pairs
end 


read_file (generic function with 1 method)

In [7]:
function evaluate_one_pair_rm(genome_pair::String, genomesize::Int, dcjs::Vector{Int}) 
    g_pair = split(genome_pair, ";")

    src = string(g_pair[1])
    target = string(g_pair[2])

    num_maps = 10 * genomesize

    _, map_to_dedupstr_dcjdist = randommap(src, target, num_maps, "none")
    dcjdists = map(t -> t[2], values(map_to_dedupstr_dcjdist))
    mindcj = minimum(dcjdists)
    # print(mindcj, " ")

    push!(dcjs, mindcj)
end 


function evaluate_one_rep_rm(genomesize::Int)
    genome_pairs = read_file(genomesize::Int)

    dcjs = Vector{Int}()
    times = Vector{Float64}()

    i = 1
    for g_pair in genome_pairs 
        println(genomesize, " ", i)
        
        time_taken = @elapsed begin
            evaluate_one_pair_rm(string(g_pair), genomesize, dcjs)
        end  

        push!(times, time_taken)
        # println(time_taken)
        i += 1
    end 

    return mean(dcjs), mean(times)
end 

# i = 150
# dcj, time = evaluate_rep_rm(i)

## write to file 
 
# str = "," * string(dcj) * " " * string(time)
# filename = "output/rm" * ".txt"

# open(filename, "a") do file
#     write(file, str)
# end

evaluate_one_rep_rm (generic function with 1 method)

In [8]:
# loop through all REP sets (strings of size 50 - 500)
# but if it bugs out, lose all data 
# function evaluate_rm()
#     times = []
#     dists = Vector{Float64}() 
#     for i in 50:50:500 
#         avgdcj, time = evaluate_one_rep_rm(i)
#         push!(times, time)
#         push!(dists, avgdcj)
#     end 

#     return dists, times
# end 

# dists, times = evaluate_rm()

In [9]:
function evaluate_one_pair_ls(genome_pair::String, genomesize::Int, dcjs::Vector{Int}, mcn::Bool, total_maps::Int, rand_maps::Int, max_neighbors::Int)
    g_pair = split(genome_pair, ";")

    src = string(g_pair[1])
    target = string(g_pair[2])

    global_min_dcj, _, _, _, _ = localsearch(src, target, total_maps, rand_maps, max_neighbors, "none", mcn, Vector())

    push!(dcjs, global_min_dcj)
end 


function evaluate_one_rep_ls(genomesize::Int, mcn::Bool, genome_pairs::Vector{SubString{String}})
    dcjs = Vector{Int}()
    times = Vector{Float64}()
    
    total_maps = 10 * genomesize
    rm = Int(ceil(.6 * total_maps))
    neighbors = Int(ceil(.3 * rm))

    i = 1
    for g_pair in genome_pairs
        println(genomesize, " ", i)
        
        time_taken = @elapsed begin
            evaluate_one_pair_ls(string(g_pair), genomesize, dcjs, mcn, total_maps, rm, neighbors)
        end  
        push!(times, time_taken)

        i += 1
    end 

    return mean(dcjs), mean(times) 
end 


# mcn = true 
# i = 150

# genome_pairs = read_file(i)
# avgdcj, time = evaluate_one_rep_ls(i, mcn, genome_pairs)


## write to file 

# str = "," * string(avgdcj) * " " * string(time) * "\n"
# filename = "mcn_ls" * ".txt"

# open(filename, "a") do file
#     write(file, str)
# end

evaluate_one_rep_ls (generic function with 1 method)

In [10]:
function evaluate_ls(mcn::Bool, arr::Vector{Int}, filename::String)
    for i in arr
        genome_pairs = read_file(i)
        avgdcj, time = evaluate_one_rep_ls(i, mcn, genome_pairs)
        
        str = string(avgdcj) * " " * string(time) * ",\n"

        open(filename, "a") do file
            write(file, str)
        end
    end 

    return dists, times
end 

mcn = false 
arr = [350, 400, 450]
filename = "output/old_ls" * ".txt"
dists, times = evaluate_ls(mcn, arr, filename)


arr = collect(50:50:450) 
mcn = true 
filename = "output/mcn_ls" * ".txt"
dists, times = evaluate_ls(mcn, arr, filename)

350 1
350 2
350 3
350 4
350 5
350 6
350 7
350 8
350 9
350 10
350 11
350 12
350 13
350 14
350 15
350 16
350 17