In [1]:
# permutations that require max number of dcj operations  - diameter of an instance 

# find the largest value of n which you can find the diameter 
# n --> diameter (randomly sample x number)
# increase n  

In [2]:
using Random

using NBInclude
@nbinclude("dcj_algo.ipynb")

In [3]:
# Function to get the first n letters of the alphabet
function first_n_letters(n::Int)
    return collect('a':'z')[1:n]
end

first_n_letters (generic function with 1 method)

In [5]:
# generates genomes/permutations/strings of length n 
# (up to 26 bc alphabet)
function generate_genome_str(n::Int)
    # generate first n letters
    letters = first_n_letters(n)

    # randomize capitalization (gene orientation)
    for i in length(letters) 
        if rand(Bool)
            letters[i] = uppercase(letters[i])
        end
    end 

    letters = shuffle(letters)

    # randomly choose length of chromosomes and circular/linear 
    max_chrom_length = n 
    curr_letters_idx = 1

    genome_str = ""
    while max_chrom_length != 0 
        chrom_length = rand(1:max_chrom_length)
        max_chrom_length -= chrom_length
        chrom = ""

        linear = rand(Bool)

        end_idx = curr_letters_idx + chrom_length - 1
        chrom = letters[curr_letters_idx:end_idx]
        if linear 
            chrom = "." * join(chrom) * "."
        end 
        
        chrom = join(chrom)
        if max_chrom_length != 0 
            genome_str = genome_str * chrom * ","
        else
            genome_str = genome_str * chrom 
        end 
        
        curr_letters_idx += chrom_length 
    end 

    return genome_str
end

# generate_genome_str(2)

generate_genome_str (generic function with 1 method)

In [6]:
# calculate diameter between target & src genome of size n 
# fixed number of src genomes to generate 
function calculate_diameter(n::Int, target::String, num_src_permutations_to_generate::Int)
    max_dist = 0 
    src = generate_genome_str(n)
    max_dist_permutation = src

    for i in 1:num_src_permutations_to_generate
        
        dist = calculate_distance(src, target, "none")
        if max_dist < dist 
            max_dist = dist
            max_dist_permutation = src 
        end 
        src = generate_genome_str(n)
    end 

    return max_dist, max_dist_permutation
end 

# calculate diameters for genomes size 1:n 
# by sampling a fixed number of src genomes to generate 
function calculate_diameters_given_num_of_src_genomes_to_generate(n::Int, x::Int, diameters::Matrix{Float64}, max_dist_genomes::Matrix{String})
    for m in 1:n
        # s = "\nx=" * string(x) * " || " * "m=" * string(m) * "\n"
        # printstyled(s; color = :red)

        # target = generate_genome_str(i) 
        target = join(first_n_letters(m))
        diam, max_dist_src = calculate_diameter(m, target, x)
        
        diameters[x, m] = diam
        max_dist_genomes[x, m] = max_dist_src
    end 
end 


calculate_diameters_given_num_of_src_genomes_to_generate (generic function with 1 method)

In [7]:
# calculates diameters for genomes size 1:n 
# by sampling 1:y src genomes (randomly generated)
function calculate_diameters(y::Int, n::Int)
    # note: matrix[a, b] = diameter of a genome of length 'b" with "a" randomly generated src permutations 
    diameters = zeros(y, n)
    max_dist_genomes = fill("", y, n)

    for x in 1:y 
        # s = "x=" * string(x) * " || " * "n=" * string(n) * "\n"
        # printstyled(s; color = :green)

        calculate_diameters_given_num_of_src_genomes_to_generate(n, x, diameters, max_dist_genomes)

        # println("======================================================================================================================================================")
    end 

    return diameters, max_dist_genomes 
end 

calculate_diameters (generic function with 1 method)

In [40]:
function Base.show(max_dist_genomes::Matrix{String})
    println()
    println("max distance genomes :: ")
    for y in 1:size(max_dist_genomes, 1)  # Loop over rows
        s = "y=" * string(y) * "\n"
        printstyled(s; color = :yellow)

        for m in 1:size(max_dist_genomes, 2)  # Loop over columns
            tar = first_n_letters(m)
            src = max_dist_genomes[y, m]
            
            println(string(m) * " " * src * " --> " * join(tar))
        end
    end 
end 


function Base.show(diameters::Matrix{Float64})
    println("diameters :: ")
    for y in 1:size(diameters, 1)  # Loop over rows
        s = "y = " * string(y) * " "
        printstyled(s; color = :yellow)

        println(diameters[y,:])
    end 
end 

In [41]:
n = 26
x = 3   # num_src_permutations to generate 

# note: matrix[a, b] = diameter of a genome of length 'b" with "a" randomly generated src permutations 
diameters, max_dist_genomes = calculate_diameters(x, n)

show(diameters)
show(max_dist_genomes)

diameters :: 
[33my = 1 [39m[0.0, 2.0, 1.0, 3.0, 3.0, 5.0, 6.0, 6.0, 7.0, 8.0, 11.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 16.0, 16.0, 17.0, 19.0, 21.0, 20.0, 21.0, 22.0, 22.0]
[33my = 2 [39m[1.0, 2.0, 3.0, 4.0, 3.0, 4.0, 7.0, 8.0, 7.0, 9.0, 9.0, 11.0, 10.0, 12.0, 15.0, 16.0, 15.0, 17.0, 18.0, 18.0, 19.0, 21.0, 23.0, 21.0, 23.0, 24.0]
[33my = 3 [39m[1.0, 1.0, 3.0, 3.0, 5.0, 6.0, 7.0, 8.0, 8.0, 9.0, 11.0, 11.0, 11.0, 13.0, 15.0, 15.0, 15.0, 18.0, 18.0, 19.0, 18.0, 20.0, 22.0, 23.0, 24.0, 25.0]

max distance genomes :: 
[33my=1[39m
1 a --> a
2 .B.,a --> ab
3 ac,b --> abc
4 .cbda. --> abcd
5 .e.,.dabc. --> abcde
6 ebd,ca,.f. --> abcdef
7 ceag,.fdb. --> abcdefg
8 eadfcbg,.h. --> abcdefgh
9 hgfIecbda --> abcdefghi
10 ifcg,bhjde,a --> abcdefghij
11 bjfiadcg,.e.,kh --> abcdefghijk
12 fea,.gicLdj.,.khb. --> abcdefghijkl
13 ecgabkhm,.iljdf. --> abcdefghijklm
14 .ln.,.fbhmkadci.,.e.,jg --> abcdefghijklmn
15 .midngbOk.,hfja,.ecl. --> abcdefghijklmno
16 mnfig,.khPod.,aljbe,.c. --> abcdefghij