In [1]:
#Function to find all the possible kmers from the string
function pattern(String, k)
    #Creating a list to handle kmers obtained
    s = [];
    #Pushing them into the list
    for i in 1:length(String)-k+1
        s = append!(s,[String[i:i+k-1]]);
    end
    #Returning the list
    return s;
end

pattern (generic function with 1 method)

In [2]:
#Function to find out the lexicographic ordering of the kmers
function lexico_order(kmers)
    #Creating a local variable sorted_kmer
    sorted_kmer = kmers;
    for i in 1:length(sorted_kmer)
        for j in i:length(sorted_kmer)
            #Alphabets Unicode values are increasing in order from A to Z.
            #So a string will be arranged in alphabetical order in this fashion
            if(cmp(sorted_kmer[i], sorted_kmer[j]) >= 0)
                #Swapping takes place
                swap = sorted_kmer[i];
                sorted_kmer[i] = sorted_kmer[j]; 
                sorted_kmer[j] = swap;
            end
        end
    end
    #Returning the values obtained after sorting
    return sorted_kmer;
end

lexico_order (generic function with 1 method)

In [3]:
using DataStructures
#Function to find DeBrujin Nodes and with edges
function deBruijnKmers(text)
    #Creating a lists to handle prefix and suffix of kmers
    prefix =[]
    suffix =[]
    #Creating a dictionary to join nodes with help of prefix and suffix
    dictionary = Dict()
    #Pushing prefix suffix k-1 kmers of the given kmer 
    #joining edges with help of prefix and suffix of k-1 mers of given kmers 
    for element in text
        kmer = element[1:length(element)-1]
        kmer2 = element[2:length(element)]
        append!(prefix,[kmer])
        append!(suffix,[kmer2]) 
        if length(kmer2) == length(prefix[1])
            if haskey(dictionary, kmer)
                append!(dictionary[kmer],[kmer2])
            else
                dictionary[kmer] = [kmer2]
            end
        end
    end
    #Getting total k-1 mers that are common in prefix and suffix list with help of sets
    list_of_kminus1mers = suffix
    set1 = Set(prefix)
    set2 = Set(suffix)
    st=setdiff(set1, set2)
    st=string(st)
    print(st)
    if length(st)>10
    append!(list_of_kminus1mers,[st[10:10+(k-1)-1]])
    end
    results = dictionary
    return list_of_kminus1mers,results
end

deBruijnKmers (generic function with 1 method)

In [4]:
function AdjacencyMatrix(ListOfNodes,Edges)
    #Indexing Nodes to get Adjacency Matrix
    Dict1 = Dict()
    x = 1
    list =[]
    for i in 1:length(ListOfNodes)
        if haskey(Dict1, ListOfNodes[i])
           continue
        else
           Dict1[ListOfNodes[i]]=x
           append!(list,[ListOfNodes[i]])
           x = x+1
        end
    end
    #Finding Adjacency Matrix and plotting the deBrujinGraph
    l = length(Dict1)
    AdjMat = zeros(Int64,(l,l))
    Z = []
    edges = []
    for (i, value) in pairs(Edges)
       append!(Z,[i])
       x = i
       for j in 1:length(value)
           y = value[j]
           append!(edges,[string(x,string(y[end]))])
           AdjMat[Dict1[x], Dict1[y]]=1
       end
    end
    return AdjMat,list,edges,Dict1
    
end

AdjacencyMatrix (generic function with 1 method)

In [5]:
using LinearAlgebra
function find_degree(AdjMat)
    #Retrieve size of matrix
    s = size(AdjMat);
    #Indegree of a node is sum of the elements present in node's column of the Adjacency Matrix
    indegree = [sum(AdjMat[:,i]) for i in 1:s[2]];
    #Outdegree of a node is sum of the elements present in node's row of the Adjacency Matrix
    outdegree = [sum(AdjMat[i,:]) for i in 1:s[1]];
    #Degree of node is sum of its indegree and outdegree
    degree = indegree .+ outdegree;
    #Degree Matrix is a diagonal matrix
    degree_Mat = Diagonal(degree);
    return indegree, outdegree, degree, degree_Mat;
end

find_degree (generic function with 1 method)

In [6]:
k = 3;
kmers = pattern("GGACTCCTCGTGGAGGCCCCCGCCCTTTTTGCCGGGCTGC",k)
print("Kmers of the given Genome are \n", kmers)

Kmers of the given Genome are 
Any["GGA", "GAC", "ACT", "CTC", "TCC", "CCT", "CTC", "TCG", "CGT", "GTG", "TGG", "GGA", "GAG", "AGG", "GGC", "GCC", "CCC", "CCC", "CCC", "CCG", "CGC", "GCC", "CCC", "CCT", "CTT", "TTT", "TTT", "TTT", "TTG", "TGC", "GCC", "CCG", "CGG", "GGG", "GGC", "GCT", "CTG", "TGC"]

In [7]:
LexicoOrderedKmers = lexico_order(kmers)
print("Sorted KmerList: \n", LexicoOrderedKmers)

Sorted KmerList: 
Any["ACT", "AGG", "CCC", "CCC", "CCC", "CCC", "CCG", "CCG", "CCT", "CCT", "CGC", "CGG", "CGT", "CTC", "CTC", "CTG", "CTT", "GAC", "GAG", "GCC", "GCC", "GCC", "GCT", "GGA", "GGA", "GGC", "GGC", "GGG", "GTG", "TCC", "TCG", "TGC", "TGC", "TGG", "TTG", "TTT", "TTT", "TTT"]

In [8]:
ListOfNodes,Edges = deBruijnKmers(LexicoOrderedKmers)
println("\nList of Nodes in debrujinGraphs: \n",ListOfNodes)
println("\nEdges: \n", pairs(Edges))

Set(Any[])
List of Nodes in debrujinGraphs: 
Any["CT", "GG", "CC", "CC", "CC", "CC", "CG", "CG", "CT", "CT", "GC", "GG", "GT", "TC", "TC", "TG", "TT", "AC", "AG", "CC", "CC", "CC", "CT", "GA", "GA", "GC", "GC", "GG", "TG", "CC", "CG", "GC", "GC", "GG", "TG", "TT", "TT", "TT"]

Edges: 
Dict{Any,Any}("CC" => ["CC", "CC", "CC", "CC", "CG", "CG", "CT", "CT"],"GC" => ["CC", "CC", "CC", "CT"],"GG" => ["GA", "GA", "GC", "GC", "GG"],"CG" => ["GC", "GG", "GT"],"TG" => ["GC", "GC", "GG"],"GT" => ["TG"],"GA" => ["AC", "AG"],"TT" => ["TG", "TT", "TT", "TT"],"AC" => ["CT"],"CT" => ["TC", "TC", "TG", "TT"],"AG" => ["GG"],"TC" => ["CC", "CG"])


In [9]:
AdjMat,List,edges,IndexingNodes = AdjacencyMatrix(ListOfNodes,Edges)
IndexingNodes

Dict{Any,Any} with 12 entries:
  "CC" => 3
  "GC" => 5
  "GG" => 2
  "CG" => 4
  "TG" => 8
  "GT" => 6
  "GA" => 12
  "TT" => 9
  "AC" => 10
  "CT" => 1
  "AG" => 11
  "TC" => 7

In [10]:
AdjMat

12×12 Array{Int64,2}:
 0  0  0  0  0  0  1  1  1  0  0  0
 0  1  0  0  1  0  0  0  0  0  0  1
 1  0  1  1  0  0  0  0  0  0  0  0
 0  1  0  0  1  1  0  0  0  0  0  0
 1  0  1  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  1  0  0  0  0
 0  0  1  1  0  0  0  0  0  0  0  0
 0  1  0  0  1  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  1  1  0  0  0
 1  0  0  0  0  0  0  0  0  0  0  0
 0  1  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  1  1  0

In [11]:
using GraphRecipes, Plots
graphplot(AdjMat,names=List,arrow=arrow(:head, :closed, 1, 1),
    nodeshape=:circle, nodesize=0.2, nodecolor=:red, curvature_scalar=0.05)

┌ Info: Precompiling Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]
└ @ Base loading.jl:1260
ERROR: LoadError: LoadError: InitError: could not load library "C:\Users\sriva\.julia\artifacts\ff37fcc80050ffc25ef51f5fae04c0d02c55369d\bin/libfdk-aac-1.dll"
The specified module could not be found. 
Stacktrace:
 [1] dlopen(::String, ::UInt32; throw_error::Bool) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.4\Libdl\src\Libdl.jl:109
 [2] dlopen(::String, ::UInt32) at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.4\Libdl\src\Libdl.jl:109
 [3] macro expansion at C:\Users\sriva\.julia\packages\JLLWrappers\KuIwt\src\products\library_generators.jl:61 [inlined]
 [4] __init__() at C:\Users\sriva\.julia\packages\libfdk_aac_jll\6yCaP\src\wrappers\x86_64-w64-mingw32.jl:8
 [5] _include_from_serialized(::String, ::Array{Any,1}) at .\loading.jl:697
 [6] _require_from_serialized(::String) at .\loading.jl:748
 [7] _require(::Base.PkgId) at .\loading.jl:1039
 [8] require(

ErrorException: Failed to precompile Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80] to C:\Users\sriva\.julia\compiled\v1.4\Plots\ld3vC_POkQ1.ji.

In [12]:
print(edges)

Any["CCC", "CCC", "CCC", "CCC", "CCG", "CCG", "CCT", "CCT", "GCC", "GCC", "GCC", "GCT", "GGA", "GGA", "GGC", "GGC", "GGG", "CGC", "CGG", "CGT", "TGC", "TGC", "TGG", "GTG", "GAC", "GAG", "TTG", "TTT", "TTT", "TTT", "ACT", "CTC", "CTC", "CTG", "CTT", "AGG", "TCC", "TCG"]

In [13]:
Indegree, Outdegree, Degree, Degree_Mat = find_degree(AdjMat);

In [14]:
println(Indegree)

[3, 4, 3, 2, 3, 1, 1, 3, 2, 1, 1, 1]


In [15]:
println(Outdegree)

[3, 3, 3, 3, 2, 1, 2, 2, 2, 1, 1, 2]


In [16]:
Degree_Mat

12×12 Diagonal{Int64,Array{Int64,1}}:
 6  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  7  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  6  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  5  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  5  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  2  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  3  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  5  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  4  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  2  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  2  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  3