In [None]:
import random 
# a program which takes in a set of patterns and 
# returns their adjacent list

def debruin_patterns(patterns):

  # sort patterns lexicographically
  patterns.sort()  
  # an empty list to store nodes
  nodes_list = []

  # extract all nodes in dna
  for i in patterns:
    nodes_list.append(i[0:-1])
  nodes_list.sort()  

  # a dictionary to keep nodes(key) and a list of their adjacent nodes(value)
  nodes = {k:[] for k in nodes_list}
  
  # loops over patterns and finds adjacent nodes 
  for j in patterns:
    start_node = j[0:-1]
    adj_node = j[1:len(j)]
    nodes[start_node].append(adj_node)

  return nodes   

In [None]:
# Python3 program to print Eulerian circuit in given 
# directed graph using Hierholzer algorithm 
def EulerianCycle(adj): 
   
    # adj represents the adjacency list of 
    # the directed graph 
       
    if len(adj) == 0: 
        return # empty graph 

    else:
      # list to store all out nodes
      all_out_nodes = []

      for j in adj.keys():
        for k in adj[j]:
          all_out_nodes.append(k)

      # find the nodes with zero outnodes and add them to the dictionary
      # (otherwise it throws errors)    
      unique_elts = set(all_out_nodes)
      for z in unique_elts:
        if z not in adj.keys():
          adj[z] = []        

    
    # Maintain a stack to keep vertices 
    # We can start from any vertex, hence we choose one at random
       
    curr_path = [str(random.choice(list(adj.keys())))] 
   
    # list to store final circuit 
    circuit = [] 
   
    while curr_path: 
   
        curr_v = curr_path[-1] 
        
        # If there's remaining edge in adjacency list   
        # of the current vertex  
      
        if adj[curr_v]: 
  
            # Find and remove the next vertex that is   
            # adjacent to the current vertex 
            next_v = str(adj[curr_v].pop())
   
            # Push the new vertex to the stack 
            curr_path.append(next_v) 
   
        # back-track to find remaining circuit 
        else: 
            # Remove the current vertex and  
            # put it in the curcuit 
            circuit.append(curr_path.pop()) 
   
    # we've got the circuit, now print it in reverse 
    reverse_circuit = []
    for i in range(len(circuit) - 1, -1, -1): 
       reverse_circuit.append(circuit[i])
    # remove last element of cycle, as it is just the first repeated   
    return(reverse_circuit)      





      
      
    

          

In [None]:
# a program which takes in a path
# and spits out a genome sequence
def PathToGenome(path):
  genome=''
  for i in range(len(path)-1):
    genome = genome + path[i][0]
  genome = genome + str(path[len(path)-1])
  return genome  

In [None]:
# takes in an integer k
# outputs the k-universal circular string

import itertools

def kuniversal(k):
  patterns = ["".join(i) for i in itertools.product('01', repeat=k)]
  db = debruin_patterns(patterns)
  eu_path = EulerianCycle(db)
  text = PathToGenome(eu_path[0:len(eu_path)-k+1])
  
  return (text)


In [None]:
def compositionk(dnaseq,k):
  kmers = []
# removing the last k-1 elements
# -1 for the last element of the cycle, as it repeats with the first
# -(k-1) because these are expressed with the overlapping first letters  
  for i in range(len(dnaseq)-k+1):
    kmers.append(dnaseq[i:i+k])
  kmers.sort()
  lst = []
  for x in kmers:
    lst.append(x)
  return(lst)  

In [None]:
kuniversal(3)

['000', '001', '010', '011', '100', '101', '110', '111']
['11', '11', '10', '01', '10', '00', '00', '01', '11']


'11101000'