## Problem 31: Find a k-Universal Circular String


#### k-Universal Circular String Problem

Find a k-universal circular binary string.

> Given: An integer k.

> Return: A k-universal circular string. (If multiple answers exist, you may return any one.)



<br>

In [136]:
# ---- INPUT -----

k = 9

In [142]:
from itertools import product

alphabet = "01"

kmers = [''.join(t) for t in product(alphabet, repeat=k)]
# kmers

In [138]:
kmers[2][:-1]

'00000001'

In [161]:
adj = {}

for kmer in kmers:
    pref = kmer[:-1]       # (k-1)-mer prefix
    suff = kmer[1:]        # (k-1)-mer suffix
    
    # noted ltr: could also have done: adj.setdefault(u, []).append(v)
    if pref not in adj:
        adj[pref] = []
    adj[pref].append(suff) 
    
    if suff not in adj:
        adj[suff] = []
    
# adj

In [160]:
# Constructing and finding a eulerian path [Problem 30]

# in this case we'll just choose start to be the first node, end to be the last
# since diff will be equal for all 

nodes = list(adj.keys())

start = nodes[0]
end = nodes[-1]

print("\nstart, end =", (start,end))
# nodes


start, end = ('00000000', '11111111')


In [151]:
# add an edge from end to start
adj[end] = adj.get(end, []) + [start]

In [162]:
def display_adjacency_list(adj_list):
    for u in sorted(adj):
        edge_list = ','.join(sorted(adj[u]))
        print(f"{u} -> {edge_list}")

# display_adjacency_list(adj)

In [153]:
# Constructing and finding a eulerian path [Problem 30]
def eulerian_cycle(adj, start):

    # making a shallow copy 
    g = {u: list(vs) for u, vs in adj.items()}
    
    if start == None:
        # choose a start node with outgoing edges
        start = next((u for u in g if g[u]), None)
    
    stack, cycle = [start], []


    while stack:
        v = stack[-1]
        
        # if v exists, we pop from the adj_list, and append to stack
        if g[v]:
            w = g[v].pop()   # consume edge v->w
            stack.append(w)
            
        # if no vs under that g (all popped for that g), then we pop from stack and add to cycle
        else:
            popped = stack.pop()
            cycle.append(popped)
            
    # reverse to correct order
    cycle.reverse()  
    
    return cycle


In [154]:
def reconstruct_string_from_genome_path(patterns):
    
    reconstruction = patterns[0]

    for i in range(1, len(patterns)):
        reconstruction += patterns[i][-1]

    return reconstruction

In [155]:
# eul path: get eul cycle with the correct start, then remove the final edge we augmented
cycle = eulerian_cycle(adj, start)[:-1]

path_str = "->".join(cycle)
# print(path_str)
# cycle

In [163]:
# reconstruction algorithm from [Problem 25]
reconstructed = reconstruct_string_from_genome_path(cycle)
# reconstructed

In [159]:
# ----- OUTPUTS -----

# we need 2^k = 2^4 = 16 (if k=4 for example), in this case, so we drop the last node's length, k-1=3 chars in this case
k1 = k-1
reconstructed[:-k1]


'00000000111111111011111110011111101011111100011111011011111010011111001011111000011110111011110110011110101011110100011110011011110010011110001011110000011101110011101101011101100011101011011101010011101001011101000011100110011100101011100100011100011011100010011100001011100000011011011010011011001011011000011010110011010101011010100011010010011010001011010000011001100011001010011001001011001000011000101011000100011000010011000001011000000010101010010101000010100100010100010010100000010010010000010001000010'