In [2]:
def overlap(a, b, min_length=3):
    """ Return length of longest suffix of 'a' matching
        a prefix of 'b' that is at least 'min_length'
        characters long.  If no such overlap exists,
        return 0. """
    start = 0  # start all the way at the left
    while True:
        start = a.find(b[:min_length], start)  # look for b's suffx in a
        if start == -1:  # no more occurrences to right
            return 0
        # found occurrence; check for full suffix/prefix match
        if b.startswith(a[start:]):
            return len(a)-start
        start += 1  # move just past previous match

In [3]:


import itertools

def scs(ss):
    """ Returns shortest common superstring of given
        strings, which must be the same length """
    shortest_sup = None
    for ssperm in itertools.permutations(ss):
        sup = ssperm[0]  # superstring starts as first string
        for i in range(len(ss)-1):
            # overlap adjacent strings A and B in the permutation
            olen = overlap(ssperm[i], ssperm[i+1], min_length=1)
            # add non-overlapping portion of B to superstring
            sup += ssperm[i+1][olen:]
        if shortest_sup is None or len(sup) < len(shortest_sup):
            shortest_sup = sup  # found shorter superstring
    return shortest_sup  # return shortest

In [4]:
input_string = ["CCT", "CTT", "TGC", "TGG", "GAT", "ATT"] 


In [5]:
def scs_all(ss):
    """ Returns shortest common superstring of given
        strings, which must be the same length """
    shortest_sup = None
    for ssperm in itertools.permutations(ss):
        sup = ssperm[0]  # superstring starts as first string
        for i in range(len(ss)-1):
            # overlap adjacent strings A and B in the permutation
            olen = overlap(ssperm[i], ssperm[i+1], min_length=1)
            # add non-overlapping portion of B to superstring
            sup += ssperm[i+1][olen:]
        if shortest_sup is None or len(sup) < len(shortest_sup):
            shortest_sup = sup  # found shorter superstring
    shortest_len = len(shortest_sup)

    scs = set()
    for ssperm in itertools.permutations(ss):
        sup = ssperm[0]
        for i in range(len(ss)-1):
            olen = overlap(ssperm[i], ssperm[i+1], min_length=1)
            sup += ssperm[i+1][olen:]
        if len(sup) <= shortest_len:
            scs.add(sup)
    return scs

In [6]:
input_string = ["CCT", "CTT", "TGC", "TGG", "GAT", "ATT"] 
scs_list = scs_all(input_string)
len(scs_list)

4

In [7]:
#!wget https://d28rh4a8wq0iu5.cloudfront.net/ads1/data/ads1_week4_reads.fq

In [8]:
def readFastq(filename):
    sequences = []
    qualities = []
    with open(filename) as fh:
        while True:
            fh.readline()  # skip name line
            seq = fh.readline().rstrip()  # read base sequence
            fh.readline()  # skip placeholder line
            qual = fh.readline().rstrip() # base quality line
            if len(seq) == 0:
                break
            sequences.append(seq)
            qualities.append(qual)
    return sequences, qualities

In [9]:
filename = 'ads1_week4_reads.fq'
reads, quali = readFastq(filename)


In [10]:
def pick_maximal_overlap(reads, k):
    """ Return a pair of reads from the list with a
        maximal suffix/prefix overlap >= k.  Returns
        overlap length 0 if there are no such overlaps."""
    reada, readb = None, None
    best_olen = 0
    
    kmer_dict = {}
    
    for read in reads:
        for i in range(len(read)-k+1):
            kmer_dict[read[i:i+k]] = set()
            
    for read in reads:
        for i in range(len(read)-k+1):
            kmer_dict[read[i:i+k]].add(read)
   
    for read in reads:
        current_kmer_set = kmer_dict[read[-1*k:]]
        for kmer_read in current_kmer_set:
            if read != kmer_read:
                olen = overlap(read, kmer_read, min_length=k)                
                if olen > best_olen:
                    reada, readb = read, kmer_read
                    best_olen = olen
    return reada, readb, best_olen

In [11]:
def greedy_scs(reads, k):
    """ Greedy shortest-common-superstring merge.
        Repeat until no edges (overlaps of length >= k)
        remain. """
    read_a, read_b, olen = pick_maximal_overlap(reads, k)
    while olen > 0:
        reads.remove(read_a)
        reads.remove(read_b)
        reads.append(read_a + read_b[olen:])
        read_a, read_b, olen = pick_maximal_overlap(reads, k)
    return ''.join(reads)

In [12]:
%%time
ss = greedy_scs(reads, 10)

KeyboardInterrupt: 

In [None]:
len(ss)

15894

In [14]:
def scs_dp(reads, k):
    """ Dynamic programming shortest-common-superstring merge.
        Finds the optimal solution to the shortest common superstring problem. """
    # Create a matrix to store the overlap lengths between pairs of reads
    overlap_matrix = [[0] * len(reads) for _ in range(len(reads))]
    
    # Calculate the overlap lengths between all pairs of reads
    for i in range(len(reads)):
        for j in range(i+1, len(reads)):
            olen = overlap_length(reads[i], reads[j], k)
            overlap_matrix[i][j] = olen
            overlap_matrix[j][i] = olen
    
    # Create a matrix to store the optimal solutions to subproblems
    scs_matrix = [[0] * len(reads) for _ in range(len(reads))]
    
    # Calculate the optimal solutions to all subproblems using dynamic programming
    for i in range(len(reads)):
        for j in range(i+1, len(reads)):
            scs_matrix[i][j] = min(scs_matrix[i][k] + scs_matrix[k][j] - overlap_matrix[i][j] for k in range(i+1, j))
            scs_matrix[j][i] = scs_matrix[i][j]
    
    # Use the optimal solutions to construct the shortest common superstring
    return construct_scs(reads, scs_matrix, 0, len(reads)-1)
    
def construct_scs(reads, scs_matrix, i, j):
    """ Construct the shortest common superstring using the optimal solutions
        stored in the scs_matrix. """
    if i == j:
        # Return the read itself if there is only one read
        return reads[i]
    else:
        # Find the read that should be merged with the current pair
        k = min((scs_matrix[i][k] + scs_matrix[k][j] - overlap_matrix[i][j]) for k in range(i+1, j))
        
        # Merge the reads using the read found in the previous step
        return construct_scs(reads, scs_matrix, i, k) + construct_scs(reads, scs_matrix, k, j)[overlap_matrix[i][k]:]

In [15]:
%%time
ss_s = scs_dp(reads, 10)

NameError: name 'overlap_length' is not defined

In [None]:
len(ss_s)

15894

In [None]:
ss_s.count("A")

4633

In [None]:
ss_s.count("T")

3723

In [18]:
def alphabet_position(text):
    # Create an empty string to store the converted characters
    converted_text = ""
    
    # Iterate through each character in the text
    for char in text:
        # Check if the character is a letter
        if char.isalpha():
            # Convert the character to its corresponding position in the alphabet and add it to the string
            converted_text += str(ord(char.lower()) - ord('a') + 1) + " "
    
    # Return the resulting string of numbers
    return converted_text


In [19]:
text = "apakah president sudah makan"
%time
alphabet_position(text)

CPU times: total: 0 ns
Wall time: 0 ns


'1 16 1 11 1 8 16 18 5 19 9 4 5 14 20 19 21 4 1 8 13 1 11 1 14 '

In [20]:
def alphabet_position1(text):
    alphabet = {  'a' : 1,
                  'b' : 2,
                  'c' : 3,
                  'd' : 4,
                  'e' : 5,
                  'f' : 6,
                  'g' : 7,
                  'h' : 8,
                  'i' : 9,
                  'j' : 10,
                  'k' : 11,
                  'l' : 12,
                  'm' : 13,
                  'n' : 14,
                  'o' : 15,
                  'p' : 16,
                  'q' : 17,
                  'r' : 18,
                  's' : 19,
                  't' : 20,
                  'u' : 21,
                  'v' : 22,
                  'w' : 23,
                  'x' : 24,
                  'y' : 25,
                  'z' : 26, }
    inds = []
    for x in text.lower():
        if x in alphabet:
            inds.append(alphabet[x])
    return ' '.join(([str(x) for x in inds]))

In [21]:
%time
alphabet_position1(text)

CPU times: total: 0 ns
Wall time: 0 ns


'1 16 1 11 1 8 16 18 5 19 9 4 5 14 20 19 21 4 1 8 13 1 11 1 14'

In [1]:
import turtle

def draw_heart():
  turtle.color("red")

  turtle.penup()
  turtle.goto(-100, 0)
  turtle.pendown()

  turtle.left(45)
  turtle.forward(60)
  turtle.right(90)
  turtle.forward(60)
  turtle.right(90)
  turtle.forward(60)
  turtle.right(90)
  turtle.forward(60)
  turtle.right(45)

  turtle.left(90)
  turtle.forward(100)
  turtle.right(90)
  turtle.forward(100)

  turtle.penup()
  turtle.goto(-100, 0)
  turtle.pendown()
  turtle.right(135)
  turtle.forward(141.42)

draw_heart()
turtle.done()


KeyboardInterrupt: 

: 