## String Reconstruction from Read-Pairs Problem

[ba3j](https://rosalind.info/problems/ba3j/)

Reconstruct a string from its paired composition.

    Given: 

Integers k and d followed by a collection of paired k-mers PairedReads.

    Return: 
    
A string Text with (k, d)-mer composition equal to PairedReads. (If multiple answers exist, you may return any one.)

In [10]:
from collections import defaultdict


In [11]:
def get_prefix(pair):
    return (pair[0][:-1], pair[1][:-1])

In [12]:
def get_suffix(pair):
    return (pair[0][1:], pair[1][1:])

In [13]:
def get_de_bruijn_graph(pairs):
    graph = defaultdict(set)
    vertices = set()
    for pair in pairs:
        suffix = get_suffix(pair)
        prefix = get_prefix(pair)
        graph[prefix].add(suffix)
        vertices.add(prefix)
        vertices.add(suffix)
    return graph, list(vertices)

In [15]:
def get_reconstruction(path,k,d):
    pref = ''.join([path[0][0]] + [x[int(k)-2:] for x, y in path[1:]])
    suff = ''.join([path[0][1]] + [y[int(k)-2:] for x, y in path[1:]])

    return pref[:d+k] + suff

In [16]:
def get_eulerian_path(graph, vertices):
  in_edges = dict.fromkeys(vertices, 0)
  out_edges = dict.fromkeys(vertices, 0)
  for vertex in vertices:
    if vertex in graph:
      out_edges[vertex] = len(graph[vertex])
      for adj in graph[vertex]:
        in_edges[adj] += 1

  start = -float("inf")
  for vertex in vertices:
    if in_edges[vertex] < out_edges[vertex]:
      start = vertex

  current_path, circuit, v = [start], [], start
  while len(current_path) > 0:
    if out_edges[v]:
      current_path.append(v)

      nextv = graph[v].pop()
      out_edges[v] -= 1
      v = nextv
    else:
      circuit.append(v)
      v = current_path.pop()
  circuit.reverse()
  return circuit
  

In [17]:
file = "rosalind_ba3j.txt" 
with open(file, 'r') as f:
    lines = f.readlines()
    k,d = map(int, lines[0].split())
    pairs = [line.strip().split('|') for line in  lines[1:]]

graph, vertices = get_de_bruijn_graph(pairs)

path = get_eulerian_path(graph, vertices)
get_reconstruction(path, k, d)

'ATGCGTTTTGCTCACTCTTACACGTGCTGGGCGTTTCTAACGGGAACAGACGACATCCACACATTTCAACCTAGCCCGTCGTGGAGGCTTACGTGGAGCTTAGTGGTTTCAGTTGGAGGCTTACGGAGGCTTACGTGGAGCTTAGTGGTTTCAGGTGGAGCTTAGTGGTTTCAGAGGAGATACTTGGAGGCTTACGTGGAGGCTTACGTGGAGCTTAGTGGTTTCAGGAGGCTTACGTGGAGCTTAGTGGTTTCAGGGGAGCTTAGTGGTTTCAGCACACGGAGGCTTACGTGGAGCTTAGTGGTTTCAGAATAAACAATCCATTTAGAATACCTCATCGAGATCGACGCGAAGTGGTAGTCAAAACCCTACCGGAGGCTTACGTGGAGCTTAGTGGTTTCAGACGTCCCGTACTTGCCCGGAGGCTTACGTGGAGCTTAGTGGTTTCAGATCCTAGGTTATCACAAGGGTTACGTCGCCTCCGGTGACCCCCCTATCACTCTACCCGCGCCTCGACGCGCTCTATACAGCCGCCTCGTCCGTGCTCTTCTTAATGGTTGGCTTGACCTCATGGGAACGCAGCGTATCACTTATCGAGCGATGATCGTTTCGCGAGCGCCCTCCTCGCTAAAAGGATGACGCTTCGGAGGCTTACGTGGAGCTTAGTGGTTTCAGTCGTCGCTTTAGGATCTATCGCCCGCATGGAAGCCTCGGGACAGCTTAGGGGTCACTAGGAGGCTTACGTGGAGCTTAGTGGTTTCAGCCATATCTTTTCAACGGACTAGCCAGACGCGGCGCGGCTCGTATTACGGGGGAATTTACTGGGAGGCTTACGTGGAGCTTAGTGGTTTCAGGCTGACTTACGCCGAGTTTACTAGGGCCACAGTCCCGTTATGGTCAAACACTTGGCTTCCTGGATGTCAAAGAGGCACTTAAAGGATAGCCAAGTGAGGAGCTAATCGGGATTTTGTCCGGGATTATTTAGGGACTTAGTTACTA