# Calculating DCJ distance between two unsigned, circular and/or linear genomes with duplicates

Credit:
- https://github.com/mlliou112/py-dcj (python version)
- ChatGPT for converting to Julia 

In [186]:
using Base.Iterators
using Parameters

abstract type AbstractGene end

In [187]:
"""
Representation for a telomere in DCJ model
"""

@with_kw struct Telomere <: AbstractGene
    reverse::Bool=false
end

function Base.isequal(a::Telomere, b)
    return typeof(a) == typeof(b)
end

function Base.show(t::Telomere)
    return "."
end

# x = Telomere()
# show(x)

In [188]:
"""
Representation for DNA fragment (a gene) in DCJ model.

    Parameters
    ----------
    id: int (required)
        unique id

    dna: str
        What dna fragment does this gene represent?

    reverse: boolean
        Is the DNA fragment reversed?
"""
@with_kw struct Gene <: AbstractGene
    id::Int
    dna::String=""
    reverse::Bool=false
end

function Base.isequal(a::Gene, b::Gene)
    return typeof(a) == typeof(b) && a.id == b.id
end

function Base.show(m::Gene)
    if m.reverse
        print("-" * string(m.id))
    else
        print(string(m.ud))
    end
end

y = Gene(1, "d", true)
show(y)

-1

In [189]:
using Test 

function test_telomere_type()
    a = Telomere()
    @test typeof(a) == Telomere
end

function test_forward_reverse_gene()
    @test isequal(Gene(0, "d", true), Gene(0, "d", false))
end

function test_telomere_repr()
    @test show(Telomere()) == "."
end

function test_reverse_equality()
    @test !isequal(Gene(0, "D", true), Gene(1, "D", true))
end

@testset "Gene Tests" begin
    test_telomere_type()
    test_forward_reverse_gene()
    test_telomere_repr()
    test_reverse_equality()
end

[0m[1mTest Summary: | [22m[32m[1mPass  [22m[39m[36m[1mTotal  [22m[39m[0m[1mTime[22m
Gene Tests    | [32m   4  [39m[36m    4  [39m[0m0.1s


Test.DefaultTestSet("Gene Tests", Any[], 4, false, false, true, 1.718295045623e9, 1.718295045749e9, false, "In[189]")

EVERYTHING WORKS ABOVE THIS LINE*******************************************************************************************************************************************************************

In [194]:
 """The abstraction of a chromosome in DCJ model, a list of telomeres and genes

    Parameters
    ----------
    genes: list of genes or string (req)
        The object to be converted into a circular or linear chromosome.

    """
mutable struct Chromosome
    genes::Vector{AbstractGene}
end

function Chromosome(genes::Vector{AbstractGene}, id_set::Set{Int})
    # print("entered Chromosome constructor for genes********************************************************")
    
    for gene in genes
        telomere = show(gene) == "."
        if !telomere 
            if gene.id ∉ id_set 
                push!(id_set, gene.id)
            else 
                throw(ArgumentError("There are duplicate genes with the same ID in this genome."))
            end
        end 
    end 
    content = genes

    return Chromosome(content)

end 


function Chromosome(genes::String, id_counter::Ref{Int64}, id_to_char::Dict{Char, Int64}, char_to_id::Dict{Char, Vector{Int64}})
    content = AbstractGene[]

    if !((genes[1] == '.') == (genes[end] == '.'))
        throw(ArgumentError("Linear Chromosome must start and end with telomeres."))
    end

    for (i, gene) in pairs(genes)
        dna = string(gene)
        rev = isuppercase(gene)
        telomere = gene == '.'        
        if telomere 
            push!(content, Telomere())
        else  # gene 
            id = id_counter[]

            id_to_char[gene] = id
            if gene in keys(char_to_id)
                push!(char_to_id[gene], id)
            else 
                char_to_id[gene] = [id]
            end 

            push!(content, Gene(id, dna, rev))

            id_counter[] += 1
        end
    end    
    
    print(content)
    return Chromosome(content)
end


function Chromosome(::Any) 
    throw(TypeError("Chromosome must be a list of Gene instances with unique IDs or a string of genes."))
end 


Chromosome

In [191]:
methods(Chromosome)

In [197]:
# testing strings 

# id_counter = Ref{Int}(1)
# id_to_char = Dict{Char, Int}()
# char_to_id = Dict{Char, Vector{Int}}()

# Chromosome(".AbcA.", id_counter, id_to_char, char_to_id)

# print("\n\n******************\nid_counter: ", id_counter[], "\n", "id_to_char", id_to_char, "\n","char_to_id: ", char_to_id)


# testing genes 
x = Telomere()
y = Gene(1, "A", true)
z = Gene(2, "B", true)
v = Telomere()

id_set = Set{Int}() 

Chromosome([y, y, z], id_set)  ## why doesn't it find the right constructor? 

# testing rand 
# x = 1 
# Chromosome(x) ## why doesn't it throw error??? 

LoadError: MethodError: no method matching Chromosome(::Vector{Gene}, ::Set{Int64})

[0mClosest candidates are:
[0m  Chromosome([91m::Vector{AbstractGene}[39m, ::Set{Int64})
[0m[90m   @[39m [35mMain[39m [90m[4mIn[194]:13[24m[39m
[0m  Chromosome(::Any)
[0m[90m   @[39m [35mMain[39m [90m[4mIn[194]:67[24m[39m


In [None]:
# Genome

mutable struct Genome
    data::Vector{Chromosome}
    # Genome(chromosomes::Chromosome...) = new(chromosomes)
end


In [None]:
"""
genomes A and B have the same letters of the same multiplicity; telomeres don't matter 
"""

function calculate_distance(src::String, target::String)
    check_conditions(src, target)

    target_genome = string_to_genome(target)
    # src_genome = string_to_genome(src)

    print(genome_A)

    # Chromosome.clear()
    # genome_b, id_to_str = transform_genome(B)
    # genome_a = transform_genome(A)
    # ag = AdjacencyGraph(genome_a, genome_b)
    # dcj_distance = length(ag.commonGenes) - ag.cycles - ag.ab_paths / 2
end

# function transform_genome(genome)
#     return typeof(genome) == Genome ? genome : Genome([Chromosome(c) for c in genome]...)
# end

# Chromosome.clear() = (Chromosome.string_d = Dict(), Chromosome.id_counter = 0)



println(calculate_distance(".abcd.", ".aCBd."))



In [None]:
function genome_to_string(A)
end


function string_to_genome(s)
     """
    Converts string to type Genome 
    """

    chrom_list = Vector{String}()

    chrom = ""
    new_chrom = true 
    linear = false

    for (i, g) in pairs(s)
        # if creating a new chromosome 
        if new_chrom 
            if chrom == ""
                linear = g == '.'
            else  # chrom isn't empty 
                linear = chrom[1] == '.'

                # check if empty chromosome .. 
                if g == '.' 
                    chrom = chrom * g 
                    push!(chrom_list, chrom)
                    chrom = ""
                    continue 
                end 
            end 
            
            chrom = chrom * g
            new_chrom = false  
        
        # appending more genes to chromosome 
        else  
            if linear  
                chrom = chrom * g 
                if g == '.' || i == length(s)  # find another telomere or end of str
                    push!(chrom_list, chrom) 
                    chrom = ""
                    new_chrom = true 
                end 
            else  # circular
                if g =='.'  # close circular chrom (won't incude telomere in chrom)
                    push!(chrom_list, chrom)
                    chrom = "."
                    new_chrom = true 
                elseif i == length(s)  # reach end of str 
                    chrom = chrom * g
                    push!(chrom_list, chrom)
                    chrom = "."
                    new_chrom = true 
                else 
                    chrom = chrom * g
                end 
            end 
        end 
    end

    print(chrom_list)
    
    return Genome(([Chromosome(c) for c in chrom_list]))
end 

string_to_genome("abc.j...hiiii")