In [1]:
# permutations that require max number of dcj operations  - diameter of an instance 

# find the largest value of n which you can find the diameter 
# n --> diameter (randomly sample x number)
# increase n  

In [2]:
using Random

using NBInclude
@nbinclude("dcj_algo.ipynb")

In [3]:
# Function to get the first n letters of the alphabet
function first_n_letters(n::Int)
    return collect('a':'z')[1:n]
end

first_n_letters (generic function with 1 method)

In [4]:
# generates genomes/permutations/strings of length n 
# (up to 26 bc alphabet)
function generate_genome_str(n::Int)
    # generate first n letters
    letters = first_n_letters(n)

    # randomize capitalization (gene orientation)
    for i in length(letters) 
        if rand(Bool)
            letters[i] = uppercase(letters[i])
        end
    end 

    letters = shuffle(letters)

    # randomly choose length of chromosomes and circular/linear 
    max_chrom_length = n 
    curr_letters_idx = 1

    genome_str = ""
    while max_chrom_length != 0 
        chrom_length = rand(1:max_chrom_length)
        max_chrom_length -= chrom_length
        chrom = ""

        linear = rand(Bool)

        end_idx = curr_letters_idx + chrom_length - 1
        chrom = letters[curr_letters_idx:end_idx]
        if linear 
            chrom = "." * join(chrom) * "."
        end 
        
        chrom = join(chrom)
        if max_chrom_length != 0 
            genome_str = genome_str * chrom * ","
        else
            genome_str = genome_str * chrom 
        end 
        
        curr_letters_idx += chrom_length 
    end 

    return genome_str
end

# generate_genome_str(2)

generate_genome_str (generic function with 1 method)

In [5]:
# calculate diameter between target & src genome of size n 
# fixed number of src genomes to generate 
function calculate_diameter(n::Int, target::String, num_src_permutations_to_generate::Int)
    max_dist = 0 
    src = generate_genome_str(n)
    max_dist_permutation = src

    for i in 1:num_src_permutations_to_generate
        
        dist = calculate_distance(src, target, "none")
        if max_dist < dist 
            max_dist = dist
            max_dist_permutation = src 
        end 
        src = generate_genome_str(n)
    end 

    return max_dist, max_dist_permutation
end 

# calculate diameters for genomes size 1:n 
# by sampling a fixed number of src genomes to generate 
function calculate_diameters_given_num_of_src_genomes_to_generate(n::Int, x::Int, diameters::Matrix{Float64}, max_dist_genomes::Matrix{String})
    for m in 1:n
        # s = "\nx=" * string(x) * " || " * "m=" * string(m) * "\n"
        # printstyled(s; color = :red)

        # target = generate_genome_str(i) 
        target = join(first_n_letters(m))
        diam, max_dist_src = calculate_diameter(m, target, x)
        
        diameters[x, m] = diam
        max_dist_genomes[x, m] = max_dist_src
    end 
end 


calculate_diameters_given_num_of_src_genomes_to_generate (generic function with 1 method)

In [6]:
# calculates diameters for genomes size 1:n 
# by sampling 1:y src genomes (randomly generated)
function calculate_diameters(y::Int, n::Int)
    # note: matrix[a, b] = diameter of a genome of length 'b" with "a" randomly generated src permutations 
    diameters = zeros(y, n)
    max_dist_genomes = fill("", y, n)

    for x in 1:y 
        # s = "x=" * string(x) * " || " * "n=" * string(n) * "\n"
        # printstyled(s; color = :green)

        calculate_diameters_given_num_of_src_genomes_to_generate(n, x, diameters, max_dist_genomes)

        # println("======================================================================================================================================================")
    end 

    return diameters, max_dist_genomes 
end 

calculate_diameters (generic function with 1 method)

In [8]:
n = 10 
y = 3   # num_src_permutations to generate 

# note: matrix[a, b] = diameter of a genome of length 'b" with "a" randomly generated src permutations 
diameters, max_dist_genomes = calculate_diameters(y::Int, n::Int)
println(diameters)
print(max_dist_genomes)

[0.0 1.0 2.0 2.0 4.0 6.0 4.0 6.0 9.0 8.0; 1.0 2.0 3.0 4.0 4.0 4.0 7.0 8.0 7.0 

9.0; 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 8.0 10.0]
["A" "a,B" "a,C,b" "bdac" ".e.,.c.,ab,.d." "e,.caf.,d,.b." "dbc,.fae.,.g." "dfeghb,.ca." "I,afhedb,.cg." ".ehg.,.bjad.,if,.c."; ".a." ".a.,.b." "a,.b.,c" ".ac.,Db" "dac,.Eb." "fcadbe" ".bGeadf.,c" ".f.,.b.,adgHc,e" "I,.e.,.fh.,ba,.cdg." "hebiJcfagd"; ".A." ".a.,.B." "b,.a.,.C." ".D.,cba" "cbdE,.a." ".acedFb." ".d.,ce,fG,b,.a." ".febacgHd." ".fcbe.,.dgih.,.a." ".bjfhd.,.gace.,i"]