In [None]:
# Funcs 

"""
checks for correct conditions 
    - only letters and dots
    - even number of telomeres in both genomes (can't have the start of a chromosome without closing it)  
"""
function check_input(A::String, B::String)
    set_A = Dict{Char, Int64}
    set_B = Set{Char}()
    tel_count_A = 0 
    tel_count_B = 0 

    for i in 1:length(A)
        if A[i] == '.'
            tel_count_A += 1
        elseif isletter(A[i])
            push!(set_A, lowercase(A[i])) # or uppercase, just keep consistent
        else
            throw(ArgumentError("Error: Genome A has non-letter genes"))
            return false
        end 
    end

    for i in 1:length(B) 
        if B[i] == '.'
            tel_count_B += 1
        elseif  isletter(A[i])
            push!(set_B, lowercase(B[i]))
        else
            throw(ArgumentError("Error: Genome B has non-letter genes"))
            return false
        end 
    end

    if mod(tel_count_A, 2) != 0 || mod(tel_count_B, 2) != 0  
         throw(ArgumentError("Error: Telomere error"))
        return false 
    elseif set_A != set_B
        throw(ArgumentError("Error: Sets of genes in the two given genomes don't match"))
        return false
    end 


    return true 
end 

# correct_conditions(".abdc.", "abCd")

In [None]:
function Chromosome(genes::Vector{AbstractGene}, id_set::Set{Int})
    for gene in genes
        telomere = show(gene) == "."
        if !telomere 
            if gene.id ∉ id_set 
                push!(id_set, gene.id)
            else 
                throw(ArgumentError("There are duplicate genes with the same ID in this genome."))
            end
        end 
    end 
    content = genes

    return Chromosome(content)

end 


## inside Chromosome
# check if empty chromosome .. 
if g == '.' 
    chrom = chrom * g 
    push!(chrom_list, chrom)
    chrom = ""
    continue 
end 

In [None]:
## tests telomeres and genes 

using Test 

function test_telomere_type()
    a = Telomere()
    @test typeof(a) == Telomere
end

function test_forward_reverse_gene()
    @test isequal(Gene(0, "d", true), Gene(0, "d", false))
end

function test_telomere_repr()
    @test show(Telomere()) == "."
end

function test_reverse_equality()
    @test !isequal(Gene(0, "D", true), Gene(1, "D", true))
end

@testset "Gene Tests" begin
    test_telomere_type()
    test_forward_reverse_gene()
    test_telomere_repr()
    test_reverse_equality()
end

In [None]:
using Test

function test_telomere_type()
    a = Telomere()
    @test typeof(a) == Telomere
end

function test_forward_reverse_marker()
    @test Marker(0, "d", true) == Marker(0, "d", false)
end

function test_telomere_repr()
    @test string(Telomere()) == "."
end

function test_reverse_equality()
    @test Marker(0, "D", true) != Marker(1, "D", true)
end

function test_dcj_single_a_dcj()
    a = Genome(Chromosome(".ac."))
    b = Genome(Chromosome(".abc."))
    @test calculate_distance(a, b, method="dcj") == 0
end

function test_dcj_single_swap_dcj()
    a = Genome(Chromosome(".acb."))
    b = Genome(Chromosome(".abc."))
    @test calculate_distance(a, b, method="dcj") == 2
end

function test_dcj_intermediate_dcj()
    a = Genome(Chromosome(".cbAdEGf."))
    b = Genome(Chromosome(".abcdefg."))
    @test calculate_distance(a, b, method="dcj") == 6
end

function test_dcj_intermediate_2_dcj()
    a = Genome(Chromosome(".aBdCE."))
    b = Genome(Chromosome(".abcde."))
    @test calculate_distance(a, b, method="dcj") == 4
end

function test_dcj_backwards()
    a = Genome(Chromosome("abc"))
    b = Genome(Chromosome("cba"))
    @test calculate_distance(a, b, method="dcj") == 2
end

function test_dcj_single_split()
    a = Genome(Chromosome(".ab."))
    b = Genome(Chromosome("ab"))
    @test calculate_distance(a, b, method="dcj") == 1
end

function test_dcj_circular_excision()
    a = Genome(Chromosome("ab"), Chromosome(".cd."))
    b = Genome(Chromosome(".abcd."))
    @test calculate_distance(a, b, method="dcj") == 1
end

function test_dcj_default_example()
    a = Genome(Chromosome("ab"), Chromosome(".cd."), Chromosome(".e."), Chromosome("fg"))
    b = Genome(Chromosome(".acD."), Chromosome("be"), Chromosome(".fg."))
    @test calculate_distance(a, b, method="dcj") == 5
end

function test_duplicated_markers()
    @test_throws ValueError transform_genome(["aa"])
end

function test_duplicated_marker_reversed()
    @test_throws ValueError transform_genome(["Aa"])
end

function test_duplicated_marker_between_chromosomes()
    @test_throws ValueError transform_genome(["abcde", ".fghiC."])
end

function test_missing_telomere()
    @test_throws ValueError transform_genome([".abcd"])
    @test_throws ValueError transform_genome(["abcd."])
end

function test_rogue_telomere()
    @test_throws ValueError transform_genome([".abc.de."])
end

function test_genome_without_chromosomes()
    @test_throws TypeError transform_genome("abcde", ".fg.")
end

function test_chromosome_list_not_markers()
    @test_throws TypeError transform_genome([["a", "b", "c"]])
end

@testset "Markers Tests" begin
    test_telomere_type()
    test_forward_reverse_marker()
    test_telomere_repr()
    test_reverse_equality()
end

@testset "Adjacency Graph Linear Tests" begin
    test_dcj_single_a_dcj()
    test_dcj_single_swap_dcj()
    test_dcj_intermediate_dcj()
    test_dcj_intermediate_2_dcj()
end

@testset "Adjacency Graph Circular Tests" begin
    test_dcj_backwards()
    test_dcj_single_split()
    test_dcj_circular_excision()
    test_dcj_default_example()
end

@testset "Genome Validation Tests" begin
    test_duplicated_markers()
    test_duplicated_marker_reversed()
    test_duplicated_marker_between_chromosomes()
    test_missing_telomere()
    test_rogue_telomere()
    test_genome_without_chromosomes()
    test_chromosome_list_not_markers()
end


In [None]:
function reset_documentation(id_counter::Ref{Int}, id_to_char::Dict{Int, Char}, char_to_id::Dict{Char, Int})
    id_counter = Ref{Int}(1)
    id_to_char = Dict{Int, Char}()
    char_to_id = Dict{Char, Int}() 
end 

In [None]:
using Test

""" TESTING """ 
### note: circular chroms start and end with the same letter (case-sensitive)


function test_single_gene_chrom_linear()
    # str_to_genome func params
    id_counter = Ref{Int}(1)
    id_to_char = Dict{Int, Char}()
    char_to_id = Dict{Char, Int}() 

    # genome setup
    data = Vector{Chromosome}()
    id_to_char = Dict{Int, Char}()
    char_to_id = Dict{Char, Int}()

    id_to_char[1] = 'a'
    char_to_id['a'] = 1
    chrom = Chromosome([Telomere(), Gene(1, 'a', false), Telomere()])
    data = [chrom]

    @test string_to_genome("a", id_counter, id_to_char, char_to_id, true) == Genome(data, id_to_char, char_to_id)
end


@testset "String to Genome Tests" begin
    test_single_gene_chrom_linear()
end


In [None]:
using NBInclude
@nbinclude("dcj_algo.ipynb")

# id_counter = Ref{Int}(1)
# id_to_char = Dict{Int, Char}()
# char_to_id = Dict{Char, Int}()

# s = ".a."

# genome = string_to_genome(s, id_counter, id_to_char, char_to_id, true)

# # show(genome)

# adj_list, adj_set = genome_to_adj_listset(genome)   

# show(adj_list)

# 2 DCJ op - adj --> adj  
src = ".abc."
target = ".acb."

# 1 DCJ op - adj --> telo  
src = "abca"
target = ".abc."

# 1 DCJ op - telo --> adj  
src = ".ab.cc"
target = ".abc."



src = ".abcdefg."
target = ".abcdGFE." 

calculate_distance(src, target)



# single chrom, single gene 
s = ".a."
s = "aa" 

# single chrom, 2 genes
s = ".ab."
s = "aba"

# single chrom, 3 genes
s = ".abc."
s = "abca"

# multiple chrom, multiple genes 
s = ".a.bb.c."
s = "aa.b.cc"
s = ".ab.cdc.de.fgf"
s = "aba.cd.ded.fg."