In [1]:
using NBInclude
@nbinclude("datastructs.ipynb")

calculate_distance (generic function with 1 method)

In [2]:
# default dict stores gene ID --> location of gene's head and tail
 
function Base.show(dict::DefaultDict{Int, Vector{Int}}, id_to_str::Dict{Int, String})
    for (key, value) in dict
        # println("$(key.dna) (id:$(key.id)) => $(value)")
        println(id_to_str[key], " $(key) => $(value)")
    end
end 

In [3]:
# process adj list 
# create dictionary of gene ID --> index/location of gene's head and tail 

function assign_ge_idx_to_gid_to_locdict(ge::GeneEnd, idx::Ref{Int}, gid_to_loc::DefaultDict{Int, Vector{Int}})
    if ge.gene == Telomere() 
        return
    end  

    if ge.head == true
        gid_to_loc[ge.gene.id][2] = idx[]
    else 
        gid_to_loc[ge.gene.id][1] = idx[]
    end
end 

function process_adj_list(adj_list:: Vector{Adjacency})
    geneid_to_location = DefaultDict{Int, Vector{Int}}(() -> zeros(Int, 2)) # tail = idx 1, head = 2 in array
    idx = Ref{Int}(1)

    for adj in adj_list
        assign_ge_idx_to_gid_to_locdict(adj.left, idx, geneid_to_location)
        assign_ge_idx_to_gid_to_locdict(adj.right, idx, geneid_to_location)
        idx[] += 1
    end 
    
    return geneid_to_location 
end 
     

process_adj_list (generic function with 1 method)

In [1]:
# helpers for dcj operations and distance


function other_adjacency_end(ge::GeneEnd, adj::Adjacency)
    if adj.left == ge
        return adj.right
    elseif adj.right == ge
        return adj.left
    else 
        ge = show(ge)
        adj = return_adj(adj)
        err = "Adjacency " * adj * " doesn't have other end from " * ge 
        throw(ArgumentError(err)) 
    end 
end

function other_adjacency_end(t::Telomere, adj::Adjacency)
    return other_adjacency_end(GeneEnd(Telomere()), adj) 
end


# given ge, finds ge in adj list 

# e.g., given list of adjacencies : {a:h, b:t} ... {b:h, d:t} & ge = b:t, 
# returns idx of {b:t, d:t} and
function find_adj(target_ge::GeneEnd, src_gid_to_l::DefaultDict{Int, Vector{Int}}, src_adjs::Vector{Adjacency})
    (target_ge.head == true) ? th_idx = 2 : th_idx = 1
    
    gene_id = target_ge.gene.id
    if gene_id == 0 
        throw(ArgumentError("Gene ID is 0")) 
    end 
    
    src_ge_idx = src_gid_to_l[gene_id][th_idx]
    if src_ge_idx == 0 
        throw(ArgumentError("Source Gene End is 0")) 
    end 
    adj = src_adjs[src_ge_idx]

    if adj.left.gene != Telomere() && adj.left.gene.id == gene_id
        src_ge = adj.left
    elseif adj.right.gene != Telomere() && adj.right.gene.id == gene_id
        src_ge = adj.right 
    end 
    
    return src_ge_idx
end 

LoadError: UndefVarError: `GeneEnd` not defined

In [None]:
# helpers for adj list to genome str() 


function common_gene_helper(common_ge::GeneEnd, left_adj::Adjacency)    
    common_gene = common_ge.gene
    dup = string(common_gene.dup)

    if common_gene == left_adj.left.gene 
        if left_adj.left.head == true 
            gene = uppercase(common_gene.dna)
        else 
            gene = lowercase(common_gene.dna)
        end 
    else 
        if left_adj.right.head == true 
            gene = uppercase(common_gene.dna)
        else 
            gene = lowercase(common_gene.dna)
        end 
    end

    gene = gene * dup 
    return gene
end 

function common_gene(adj1::Adjacency, adj2::Adjacency)
    common_ge = nothing
    letter = nothing 

    if adj1.left.gene == Telomere() 
        common_ge = adj1.right
    elseif adj1.right.gene == Telomere() 
        common_ge = adj1.left 
    elseif adj1.left.gene == adj2.left.gene || adj1.left.gene == adj2.right.gene
        common_ge = adj1.left
    elseif adj1.right.gene == adj2.left.gene || adj1.right.gene == adj2.right.gene 
        common_ge = adj1.right
    else   # no common gene bt 2 adjacent adjacencies
        common_ge = nothing 
    end 

    if common_ge != nothing 
        letter  = common_gene_helper(common_ge, adj1)
    end 
    
    return letter, common_ge
end 
    

In [None]:
function end_lin_chrom(chrom::String, genome_str::String, last::Bool) 
    if last 
        chrom *= "." 
    else 
        chrom *= "." * ","
    end 

    genome_str *= chrom 
    chrom = ""

    return chrom, genome_str
end 

function end_circ_chrom(firstgene_circ_ge::GeneEnd, i::Int, adj_list::Vector{Adjacency}, chrom::String, genome_str::String, last::Bool) 
    # adj_list[i].left == firstgene_circ_ge || adj_list[i].right == firstgene_circ_ge
    end_circ_gene = common_gene( adj_list[i], Adjacency(GeneEnd(Telomere()), firstgene_circ_ge))[1] 
    if last 
        chrom *= end_circ_gene
    else 
        chrom *= end_circ_gene * ","
    end 
    genome_str *= chrom 

    return chrom, genome_str
end 


In [1]:
function process_one_adj(i::Int, adj_list::Vector{Adjacency}, chrom::String, genome_str::String, circ::Bool, firstgene_circ_ge)
    # contains one chrom (empty w telomeres or one gene) 
    if adj_list[i].left.gene == adj_list[i].right.gene 
        if adj_list[i].left.gene  == Telomere()
            chrom =  "..,"
            genome_str *= chrom 
            chrom = ""
        else                
            gene = adj_list[i].left.gene
            if adj_list[i].left.head == true 
                chrom = uppercase(gene.dna*string(gene.dup))
            else 
                chrom = lowercase(gene.dna*string(gene.dup))
            end 
            genome_str *= chrom * ","
            chrom = "" 
        end 
    # chrom extends past this one adj 
    elseif i == length(adj_list)  # reached end 
        if  circ
            chrom, genome_str = end_circ_chrom(firstgene_circ_ge, i, adj_list, chrom, genome_str, true)
        else # linear 
            chrom, genome_str = end_lin_chrom(chrom, genome_str, true)
        end 
    else  
        cg_char, cg_ge = common_gene(adj_list[i], adj_list[i+1])

        if isempty(chrom) # start new chrom 
            if adj_list[i].left.gene != Telomere() &&  adj_list[i].right.gene != Telomere() 
                # circ chrom 
                firstgene_circ_ge = other_adjacency_end(cg_ge, adj_list[i])
                circ = true 
            else # linear
                chrom *= "."
            end 
            chrom *= cg_char
        else # adding to existing chrom 
            if circ                
                # end circular chrom
                if cg_char == nothing 
                    char = common_gene_helper(firstgene_circ_ge, adj_list[i])
                    chrom *= char * ","
                    genome_str *= chrom
                    chrom = ""
                    circ = false 
                else 
                    chrom *= cg_char
                end 
            else 
                # end linear chrom 
                if other_adjacency_end(cg_ge, adj_list[i]) == GeneEnd(Telomere()) 
                    chrom, genome_str = end_lin_chrom(chrom, genome_str, false) 
                else 
                    chrom *= cg_char
                end 
            end 
        end  
    end 
    
    return chrom, genome_str, circ, firstgene_circ_ge
end 


function ordered_adjlist_to_genomestr(adj_list::Vector{Adjacency})
    genome_str = ""
    chrom = ""
    circ = false 
    firstgene_circ_ge = nothing 

    for i in range(1,length(adj_list))
        chrom, genome_str, circ, firstgene_circ_ge = process_one_adj(i, adj_list, chrom, genome_str, circ, firstgene_circ_ge)
    end 
    
    return genome_str[1:end]
end 

function adjlist_to_str(adj_list::Vector{Adjacency}, gid_to_loc::DefaultDict{Int, Vector{Int}}, tar_wcounts::String)
     # convert adj back to genome string 
    inorder_adj_list = reorder_adjs(adj_list, gid_to_loc)
    
    println("*******")
    show(inorder_adj_list)
    println("\n**********")

    src_genome_str = ordered_adjlist_to_genomestr(inorder_adj_list)

    # check genesets 
    src_geneset = find_genes(src_genome_str)
    tar_geneset = find_genes(tar_wcounts)

    if tar_geneset != src_geneset
        println(src_genome_str)
        println(tar_geneset)
        println(src_geneset)
        throw(ArgumentError("Source gene set != target gene set"))
    end 

    return src_genome_str
end 


LoadError: UndefVarError: `Adjacency` not defined