In [2]:
## tests telomeres adn genes 

using Test 

function test_telomere_type()
    a = Telomere()
    @test typeof(a) == Telomere
end

function test_forward_reverse_gene()
    @test isequal(Gene(0, "d", true), Gene(0, "d", false))
end

function test_telomere_repr()
    @test show(Telomere()) == "."
end

function test_reverse_equality()
    @test !isequal(Gene(0, "D", true), Gene(1, "D", true))
end

@testset "Gene Tests" begin
    test_telomere_type()
    test_forward_reverse_gene()
    test_telomere_repr()
    test_reverse_equality()
end

Gene Tests: [91m[1mError During Test[22m[39m at [39m[1mIn[2]:22[22m
  Got exception outside of a @test
  UndefVarError: `Telomere` not defined
  Stacktrace:
    [1] [0m[1mtest_telomere_type[22m[0m[1m([22m[0m[1m)[22m
  [90m    @[39m [35mMain[39m [90m.\[39m[90m[4mIn[2]:6[24m[39m
    [2] [0m[1mmacro expansion[22m
  [90m    @[39m [90m[4mIn[2]:23[24m[39m[90m [inlined][39m
    [3] [0m[1mmacro expansion[22m
  [90m    @[39m [90mC:\Users\mgnli\.julia\juliaup\julia-1.10.3+0.x64.w64.mingw32\share\julia\stdlib\v1.10\Test\src\[39m[90m[4mTest.jl:1577[24m[39m[90m [inlined][39m
    [4] top-level scope
  [90m    @[39m [90m[4mIn[2]:23[24m[39m
    [5] [0m[1meval[22m
  [90m    @[39m [90m.\[39m[90m[4mboot.jl:385[24m[39m[90m [inlined][39m
    [6] [0m[1minclude_string[22m[0m[1m([22m[90mmapexpr[39m::[0mtypeof(REPL.softscope), [90mmod[39m::[0mModule, [90mcode[39m::[0mString, [90mfilename[39m::[0mString[0m[1m)[22m
  [90m 

LoadError: [91mSome tests did not pass: 0 passed, 0 failed, 1 errored, 0 broken.[39m

In [None]:
# Funcs 

"""
checks for correct conditions 
    - only letters and dots
    - even number of telomeres in both genomes (can't have the start of a chromosome without closing it)  
"""
function check_conditions(A::String, B::String)
    set_A = Dict{Char, Int64}
    set_B = Set{Char}()
    tel_count_A = 0 
    tel_count_B = 0 

    for i in 1:length(A)
        if A[i] == '.'
            tel_count_A += 1
        elseif isletter(A[i])
            push!(set_A, lowercase(A[i])) # or uppercase, just keep consistent
        else
            throw(ArgumentError("Error: Genome A has non-letter genes"))
            return false
        end 
    end

    for i in 1:length(B) 
        if B[i] == '.'
            tel_count_B += 1
        elseif  isletter(A[i])
            push!(set_B, lowercase(B[i]))
        else
            throw(ArgumentError("Error: Genome B has non-letter genes"))
            return false
        end 
    end

    if mod(tel_count_A, 2) != 0 || mod(tel_count_B, 2) != 0  
         throw(ArgumentError("Error: Telomere error"))
        return false 
    elseif set_A != set_B
        throw(ArgumentError("Error: Sets of genes in the two given genomes don't match"))
        return false
    end 


    return true 
end 

# correct_conditions(".abdc.", "abCd")

In [1]:
function Chromosome(genes::Vector{AbstractGene}, id_set::Set{Int})
    # print("entered Chromosome constructor for genes********************************************************")
    
    for gene in genes
        telomere = show(gene) == "."
        if !telomere 
            if gene.id ∉ id_set 
                push!(id_set, gene.id)
            else 
                throw(ArgumentError("There are duplicate genes with the same ID in this genome."))
            end
        end 
    end 
    content = genes

    return Chromosome(content)

end 



function Chromosome(::Any) 
    throw(TypeError("Chromosome must be a list of Gene instances with unique IDs or a string of genes."))
end 

LoadError: UndefVarError: `AbstractGene` not defined

In [None]:
### testing chromosome works 

# testing strings 

# id_counter = Ref{Int}(1)
# id_to_char = Dict{Char, Int}()
# char_to_id = Dict{Char, Vector{Int}}()

# Chromosome(".AbcA.", id_counter, id_to_char, char_to_id)

# print("\n\n******************\nid_counter: ", id_counter[], "\n", "id_to_char", id_to_char, "\n","char_to_id: ", char_to_id)


# testing genes 
x = Telomere()
y = Gene(1, "A", true)
z = Gene(2, "B", true)
v = Telomere()

id_set = Set{Int}() 

Chromosome([y, y, z], id_set)  ## why doesn't it find the right constructor? 

# testing rand 
# x = 1 
# Chromosome(x) ## why doesn't it throw error??? 

In [None]:
"""
Gene head or tail end. Used in adjacencies
"""
@with_kw struct GeneEnd
    gene::AbstractGene
    head::Bool=true
end

"""Adjacency Data Structure"""
mutable struct Adjacency 
    left_end_gene::GeneEnd
    right_end_gene::GeneEnd
    label::Vector{Gene}

    Adjacency(left_end_gene::GeneEnd = nothing, right_end_gene::GeneEnd = nothing; label::Vector{Gene} = nothing) = new([left_end_gene, right_end_gene], left_end_gene, right_end_gene, label)
end

function Base.isequal(a::GeneEnd, b::GeneEnd)
    return a.gene == b.gene && a.head == b.head
end

function Base.show(me::GeneEnd)
    if me.head
        print(string(me.gene))
    else
        print(string(me.gene) * "*")
    end
end

function other_adjacency_end(me::GeneEnd, adj::Adjacency)
    if adj.left_end_gene == me
        return adj.right_end_gene
    else
        return adj.left_end_gene
    end
end

In [None]:


# AdjacencyGraph

mutable struct AdjacencyGraph
    commonGenes::Set{AbstractGene}
    adjA::Vector{Adjacency}
    adjB::Vector{Adjacency}
    cycles::Int
    ab_paths::Int
    a_runs::Int
    b_runs::Int
    run_potential::Int
    indel_potential::Int
end

function AdjacencyGraph(A::Genome, B::Genome)
    gene_set_a = Set(Iterators.flatten(A.data))
    gene_set_b = Set(Iterators.flatten(B.data))
    commonGenes = intersect(gene_set_a, gene_set_b)
    if Telomere() in commonGenes
        delete!(commonGenes, Telomere())
    end
    gene_set_a = setdiff(gene_set_a, commonGenes)
    gene_set_b = setdiff(gene_set_b, commonGenes)

    adjA = Adjacency[]
    adjB = Adjacency[]
    adj = [GeneEnd(Telomere()), GeneEnd(Telomere())]
    adjacencies = [adjA, adjB]
    reference_A = Dict{GeneEnd, Int}(GeneEnd(Telomere(), true) => 0)
    reference_B = Dict{GeneEnd, Int}(GeneEnd(Telomere(), true) => 0)
    references = [reference_A, reference_B]

    for (i, genome) in enumerate([A, B])
        adjacency_length = 0
        for chromosome in genome.data
            index = 0
            adj_index = 0
            current_gene = chromosome.data[index]
            chromosome_genes = Set(chromosome.data)
            adjacency_length += length(intersect(commonGenes, chromosome_Genes))
            if chromosome.data[1] isa Telomere
                adjacency_length += 1
            end
            while length(adjacencies[i]) < adjacency_length
                label = AbstractGene[]
                if current_gene isa Telomere
                    adj[1] = GeneEnd(Telomere(), true)
                else
                    adj[1] = GeneEnd(current_gene, !current_gene.reverse)
                end
                next_gene = ifelse(index >= length(chromosome.data) - 1, chromosome.data[1], chromosome.data[index + 1])
                while next_gene ∉ commonGene
                    if next_gene isa Telomere
                        break
                    end
                    push!(label, next_gene)
                    index += 1
                    next_gene = ifelse(index >= length(chromosome.data) - 1, chromosome.data[1], chromosome.data[index + 1])
                end
                if next_gene isa Telomere
                    adj[2] = GeneEnd(Telomere(), true)
                else
                    adj[2] = GeneEnd(next_gene, next_gene.reverse)
                end
                push!(adjacencies[i], Adjacency(adj[1], adj[2], label))
                if adj[1].gene != Telomere()
                    references[i][adj[1]] = length(adjacencies[i])
                end
                if adj[2].gene != Telomere()
                    references[i][adj[2]] = length(adjacencies[i])
                end
                current_gene = next_gene
                index += 1
                adj_index += 1
            end
        end
    end

    to_visit_a_index = Set(1:length(adjA))
    to_visit_b_index = Set(1:length(adjB))
    visited_a_index = Set{Int}()
    visited_b_index = Set{Int}()
    cycles = 0
    ab_paths = 0
    a_runs = 0
    on_a_run = false
    b_runs = 0
    on_b_run = false

    while !isempty(to_visit_a_index)
        current_adj_index = pop!(to_visit_a_index)
        push!(visited_a_index, current_adj_index)
        left_gene = adjA[current_adj_index].left_end_gene
        right_gene = adjA[current_adj_index].right_end_gene
        if !isempty(adjA[current_adj_index].label)
            a_runs += 1
            on_a_run = true
        end
        paths_end_on_a = [true, true]
        for (i, current_gene) in enumerate([left_gene, right_gene])
            a_side = true
            next_adj_index = reference_B[current_gene]
            if next_adj_index in visited_b_index
                continue
            end
            while next_adj_index != nothing
                current_adj_index = next_adj_index
                a_side = !a_side
                adj_side = a_side ? adjA : adjB
                current_adj = adj_side[current_adj_index]
                reference_side = a_side ? reference_B : reference_A
                if a_side
                    push!(visited_a_index, current_adj_index)
                    delete!(to_visit_a_index, current_adj_index)
                else
                    push!(visited_b_index, current_adj_index)
                    delete!(to_visit_b_index, current_adj_index)
                end
                if !isempty(current_adj.label)
                    if !a_side && on_a_run
                        on_b_run = true
                        on_a_run = false
                        b_runs += 1
                    elseif !a_side && !on_b_run
                        on_b_run = true
                        b_runs += 1
                    elseif a_side && on_b_run
                        on_a_run = true
                        on_b_run = false
                        a_runs += 1
                    elseif a_side && !on_a_run
                        on_a_run = true
                        a_runs += 1
                    end
                end
                current_gene = current_gene == current_adj.left_end_gene ? current_adj.right_end_gene : current_adj.left_end_gene
                next_adj_index = reference_side[current_gene]
                if next_adj_index == nothing
                    paths_end_on_a[i] = a_side
                    break
                elseif (a_side && next_adj_index in visited_b_index) || (!a_side && next_adj_index in visited_a_index)
                    cycles += 1
                    break
                end
            end
        end
        if paths_end_on_a[1] != paths_end_on_a[2]
            ab_paths += 1
        end
    end

    run_potential = a_runs + b_runs
    indel_potential = run_potential > 0 ? (run_potential + 1) ÷ 2 + ((run_potential ÷ 2) % 2) : 0

    return AdjacencyGraph(commonGenes, adjA, adjB, cycles, ab_paths, a_runs, b_runs, run_potential, indel_potential)
end



In [None]:
 """The abstraction of a chromosome in DCJ model, a list of telomeres and genes

    Parameters
    ----------
    genes: list of genes or string (req)
        The object to be converted into a chromosome.

    """
mutable struct Chromosome
    genes::Vector{AbstractGene}
    # gene_set::Set{String}
end

# mutable struct ChromosomeStatic
#     string_d::Dict{String, Int}  # gene to ID 
#     uid_counter::Int
#     ChromosomeStatic() = new(Dict{String, Int}(), 0)
# end

# const chromosome_static = ChromosomeStatic()

function Chromosome(genes::Vector{})
    # gene_set = Set{AbstractGene}()
    content = AbstractGene[]
    
    # genes = list of genes 
    if isa(genes, Vector{AbstractGene})
        content = genes
     
    # throw(TypeError("Chromosome must be a list of Gene instances or a string of unique genes."))

    # parse string 
    # else 
    #     genes_str = collect(genes)
    #     if !((genes_str[1] == '.') == (genes_str[end] == '.'))
    #         throw(ArgumentError("Linear Chromosome must start and end with telomeres."))
    #     end
    #     for (i, s) in enumerate(genes_str)
    #         dna = string(s)
    #         rev = isuppercase(s)
    #         telomere = s == '.'
    #         # handle gene 
    #         if s in gene_set && !telomere
    #             throw(ArgumentError("Duplicated genes are not allowed. ($s)"))
    #         elseif s in keys(chromosome_static.string_d)
    #             uid = chromosome_static.string_d[s]
    #             push!(gene_set, s)
    #         else
    #             chromosome_static.string_d[s] = chromosome_static.uid_counter
    #             push!(gene_set, s)
    #             uid = chromosome_static.uid_counter
    #             chromosome_static.uid_counter += 1
    #         end
    #         # handle telomere
    #         if telomere && !(i == 1 || i == length(genes_str))
    #             throw(ArgumentError("Telomere cannot appear in middle of chromosome."))
    #         elseif telomere
    #             push!(content, Telomere())
    #         else
    #             push!(content, Gene(uid, dna, rev))
    #         end
    #     end
    end
    return Chromosome(content, gene_set)
end

function clear_chromosome_static()
    chromosome_static.string_d = Dict{String, Int}()
    chromosome_static.uid_counter = 0
end