# Genome Rearrangements

In [2]:
from __future__ import division
from __future__ import print_function

### Bring to Top Algorithm

In [1]:
def BringToTop(pi):
    t = 0
    for bottom in range(len(pi)-1,0,-1):
        i = pi.index(max(pi[:bottom+1]))
        if (i != bottom):
            if (i > 0):
                pi = [pi[j] for j in range(i,-1,-1)] + pi[i+1:] 
                print("   Moved to top:", pi)
                t += 1
            pi = [pi[j] for j in range(bottom,-1,-1)] + pi[bottom+1:] 
            print("Moved to bottom:", pi)
            t += 1
    return t

print(BringToTop([5,2,3,4,1]))

Moved to bottom: [1, 4, 3, 2, 5]
   Moved to top: [4, 1, 3, 2, 5]
Moved to bottom: [2, 3, 1, 4, 5]
   Moved to top: [3, 2, 1, 4, 5]
Moved to bottom: [1, 2, 3, 4, 5]
5


### Greedy Reversal Sort Algorithm

In [3]:
def GreedyReversalSort(pi):
    t = 0
    for i in range(len(pi)-1):
        j = pi.index(min(pi[i:]))
        if (j != i):
            pi = pi[:i] + [v for v in reversed(pi[i:j+1])] + pi[j+1:]
            print("rho(%2d,%2d) = %s" % (i+1,j+1,pi))
            t += 1
    return t

print(GreedyReversalSort([3,4,2,1,5,6,7,10,9,8]))

rho( 1, 4) = [1, 2, 4, 3, 5, 6, 7, 10, 9, 8]
rho( 3, 4) = [1, 2, 3, 4, 5, 6, 7, 10, 9, 8]
rho( 8,10) = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
3


### Improved Breakpoint Reversal Sort Algorithm

In [4]:
def hasBreakpoints(seq):
    """ returns True if sequences is not strictly increasing by 1 """
    for i in range(1, len(seq)):
        if (seq[i] != seq[i-1] + 1):
            return True
    return False

def getStrips(seq):
    """ find contained intervals where sequence is ordered, and return intervals
    in as lists, increasing and decreasing. Single elements are considered
    decreasing. "Contained" excludes the first and last interval. """
    deltas = [seq[i+1] - seq[i] for i in range(len(seq)-1)]
    increasing = list()
    decreasing = list()
    start = 0
    for i, diff in enumerate(deltas):
        if (abs(diff) == 1) and (diff == deltas[start]):
            continue
        if (start > 0):
            if deltas[start] == 1:
                increasing.append((start, i+1))
            else:
                decreasing.append((start, i+1))
        start = i+1
    return increasing, decreasing

def pickReversal(seq, strips):
    """ test each decreasing interval to see if it leads to a reversal that
    removes two breakpoints, otherwise, return a reversal that removes only one """
    for i, j in strips:
        k = seq.index(seq[j-1]-1)
        if (seq[k+1] + 1 == seq[j]):
            # removes 2 breakpoints
            return 2, (min(k+1, j), max(k+1, j))
    # In the worst case we remove only one, but avoid the length "1" strips
    for i, j in strips:
        k = seq.index(seq[j-1]-1)
        if (j - i > 1):
            break
    return 1, (min(k+1, j), max(k+1, j))

def doReversal(seq,reversal):
    i, j = reversal
    return seq[:i] + [element for element in reversed(seq[i:j])] + seq[j:]

In [5]:
def improvedBreakpointReversalSort(seq, verbose=True):
    seq = [0] + seq + [max(seq)+1]                             # Extend sequence
    if (verbose):
        print("0: %s" % seq)
    N = 0
    while hasBreakpoints(seq):
        increasing, decreasing = getStrips(seq)
        if len(decreasing) > 0:                                # pick a reversal that removes a decreasing strip
            removed, reversal = pickReversal(seq, decreasing)
        else:
            removed, reversal = 0, increasing[0]               # No breakpoints can be removed
        if verbose:
            print("Strips:", increasing, decreasing)
            print("%d: %s  rho%s" % (removed, seq, reversal))
        seq = doReversal(seq,reversal)
        N += 1
    if verbose:
        print(seq, "Sorted")
    return N

In [6]:
print(improvedBreakpointReversalSort([3,4,1,2,5,6,7,10,9,8], verbose=True))

0: [0, 3, 4, 1, 2, 5, 6, 7, 10, 9, 8, 11]
Strips: [(1, 3), (3, 5), (5, 8)] [(8, 11)]
2: [0, 3, 4, 1, 2, 5, 6, 7, 10, 9, 8, 11]  rho(8, 11)
Strips: [(1, 3), (3, 5)] []
0: [0, 3, 4, 1, 2, 5, 6, 7, 8, 9, 10, 11]  rho(1, 3)
Strips: [(3, 5)] [(1, 3)]
1: [0, 4, 3, 1, 2, 5, 6, 7, 8, 9, 10, 11]  rho(3, 5)
Strips: [] [(1, 5)]
2: [0, 4, 3, 2, 1, 5, 6, 7, 8, 9, 10, 11]  rho(1, 5)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Sorted
4


### Count Breakpoints

In [3]:
def count_breakpoints(p):
    p.append(len(p)+1)
    p.insert(0, 0)
    return sum([1 for i in range(len(p)-1) if abs(p[i]-p[i+1]) > 1])

In [4]:
seq = [8, 7, 10, 9, 6, 5, 4, 3, 2, 1]
count_breakpoints(seq)

4