In [1]:
from itertools import product
def kmers(text, k):
    patterns = set()
    for i in range(len(text) - k + 1):
        patterns.add(text[i: i+k])
    return patterns

def getProb(km, profile):
    p = 1
    for i, c in enumerate(km):
        p = p*profile[c][i]
    return p

def ProfileMostProbableKmer(text, k, profile):
    kms = kmers(text, k)
    prob = 0
    for km in kms:
        p = getProb(km, profile)
        if p > prob:
            prob = p
            motif = km
    return motif

In [2]:
def score(motifs):
    # Entropy with pseudocounts
    profile = get_profile_laplace(motifs)
    matrix = []
    entropy = 0
    for k, v in profile.items():
        matrix.append(v)
    matrix = np.array(matrix)
    for col in matrix.T:
        col_ent = 0 #- 1/9*np.log(1/9)/np.log(2)
        for p in col:
            col_ent = col_ent - p*np.log(p)/np.log(2)
        entropy = entropy + col_ent
    return entropy

In [3]:
def score(motifs):
    # Entropy no pseudocounts
    motifs_matrix = []
    for motif in motifs:
        row = []
        for c in motif:
            row.append(c)
        motifs_matrix.append(row)
    motifs_matrix = np.array(motifs_matrix)
    length, width = motifs_matrix.shape
    total_entropy = 0
    for i in range(width):
        frecs = Counter(motifs_matrix[:,i])
        entropy = 0
        for k, v in frecs.items():
            p = v/length
            entropy = entropy - p*np.log(p)/np.log(2)
        total_entropy = total_entropy + entropy
    return total_entropy

In [4]:
import numpy as np
from collections import Counter


def get_profile(motifs):
    motifs_matrix = []
    for motif in motifs:
        row = []
        for c in motif:
            row.append(c)
        motifs_matrix.append(row)
    motifs_matrix = np.array(motifs_matrix)
    height, width = motifs_matrix.shape
    profile = {'A': [], 'C': [], 'G': [], 'T': []}
    for i in range(width):
        frecs = dict(Counter(motifs_matrix[:,i]))
        for k in profile.keys():
            if k in frecs:
                profile[k].append(frecs[k]/height)
            else:
                profile[k].append(0)
    return profile

In [5]:
def get_motifs(dna, profile):
    k = len(profile[list(profile.keys())[0]])
    new_motifs = []
    for line in dna:
        new_motifs.append(ProfileMostProbableKmer(line, k, profile))
    return new_motifs

In [6]:
def get_profile_laplace(motifs):
    motifs_matrix = []
    klen = len(motifs[0])
    for motif in motifs:
        row = []
        for c in motif:
            row.append(c)
        motifs_matrix.append(row)
    motifs_matrix = np.array(motifs_matrix)
    height, width = motifs_matrix.shape
    profile = {'A': [], 'C': [], 'G': [], 'T': []}
    for i in range(width):
        frecs = dict(Counter(motifs_matrix[:,i]))
        for k in profile.keys():
            if k in frecs:
                profile[k].append((frecs[k] + 1)/(height + 4))
            else:
                profile[k].append(1 / (height + 4))
    return profile

In [7]:
def RandomizedMotifSearchIter(dna, k, t):
    indexes = np.random.randint(0, len(dna[0]) - k, t)
    motifs = []
    for i, idx in enumerate(indexes):
        motifs.append(dna[i][idx: idx + k])
    BestMotifs = motifs
    while True:
        profile = get_profile_laplace(motifs)
        motifs = get_motifs(dna, profile)
        if score(motifs) < score(BestMotifs):
            BestMotifs = motifs
        else:
            return BestMotifs, score(BestMotifs)

In [8]:
dna = """ATGAGGTC
GCCCTAGA
AAATAGAT
TTGTGCTA""".split('\n')
k = 3
t = len(dna)
print(dna)
# Ejercicio
motifs = ['GTC', 'CCC', 'ATA', 'GCT']
profile = get_profile_laplace(motifs)
print(profile)
motifs = get_motifs(dna, profile)
print(' '.join(motifs))

['ATGAGGTC', 'GCCCTAGA', 'AAATAGAT', 'TTGTGCTA']
{'A': [0.25, 0.125, 0.25], 'C': [0.25, 0.375, 0.375], 'G': [0.375, 0.125, 0.125], 'T': [0.125, 0.375, 0.25]}
GTC GCC ATA GCT


In [9]:
def RandomizedMotifSearch(dna, k, t):
    motifs_arr = []
    scores_arr = []
    for i in range(1000):
        motifs, sc = RandomizedMotifSearchIter(dna, k, t)
        motifs_arr.append(motifs)
        scores_arr.append(sc)
    min_score_index = np.argmin(np.array(scores_arr))
    return motifs_arr[min_score_index]

In [10]:
k = 8 
t = 5
dna = """CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA
GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG
TAGTACCGAGACCGAAAGAAGTATACAGGCGT
TAGATCAAGTTTCAGGTGCACGTCGGTGAACC
AATCCACCAGCTCCACGTGCAATGTTGGCCTA""".split('\n')

In [13]:
best_motifs = RandomizedMotifSearch(dna, k, t)

In [14]:
print('\n'.join(best_motifs))

GTGTTCAG
GTGTAAGT
GTATACAG
GTGCACGT
GTGCAATG


In [15]:
k=15
t=20
dna = """ACTTATATCTAGAGTAAAGCCCTGATTCCATTGACGCGATCCCTACCTCCATCATACTCCACAGGTTCTTCAATAGAACATGGGGAAAACTGAGGTACACCAGGTCTAACGGAGATTTCTGGCACTAACTACCCAAAATCGAGTGATTGAACTGACTTATATCTAGAGT
AAAGCCCTGATTCCATTGACGCGATCCCTACCTCCATCATACTCCACAGGTTCTTCAATAGAACATGGGGAAAACTGAGGTACACCAGGTCTAACGGAGATTTCTGGCACTAACTACCCAAAATCCTCTCGATCACCGACGAGTGATTGAACTGACTTATATCTAGAGT
CACTCCCGTCCGTCTGACGCCAGGTGCTCTACCCCGCTGATTGTCTGGTACATAGCAGCCTATAGATCACCGATGCAGAAACACTTCGAGGCAGCCGATTTCGCTTATCACAACGTGACGGAATTTGATAAACCACGTACTCTAATACCGTCACGGGCCCATCAACGAA
ACAAGAACTGGTGGGGAGACTATGACACTCTAGCGGTCGCATAAGGGCCGGAAACCAGGACAAATCGATAAGATGAAGCGGGGATATAAGCCTTATACTGCGACTGGTTCCTTATATTATTTAGCCCCGATTGATCACCGATTAAAATATTCTGCGGTTTTCGAGACGG
TAACCACACCTAAAATTTTTCTTGGTGAGATGGACCCCCGCCGTAAATATCAGGATTAAATGTACGGATACCCATGACCCTCCAGTCATCTACCTTCCCGTGGTGGTCGCTCAGCCTTGTGCAGACCGAACTAGCACCTGTCACATACAATGTTGCCCGCATAGATCGT
ATCCGACAGAGGCAGTGAATAAGGTTTCGTTTCCTCAGAGAGTAGAACTGCGTGTGACCTTGCCTTCACCGACATCCGTTTCCAATTGAGCTTTTCAGGACGTTTAGGTAACTGATTGTCATTGCAATTGTCCGGGGGATTTAGATGGCCGGGTACCTCTCGGACTATA
CCTTGTTGCCACCGATTCGCGAGCAACATCGGAGTGCTCTGATTCACGGCGATGCTCCACGAAGAGGACCGCGGCACGACACGCCCTGTACCTACGTTTCTGGATATCCTCCGGCGAGTTAATAGAGCAATACGACCTGGTCGTCGAGATCGTGTATCTAGCCCTACCT
ATAGGTTAACGAATCAGGAGAGTTAATTTTACCTAGCTAGAGCGGACGGTGCCTGGCTGTATTCGCGTTTGACTTTCGGGCTCGCTGATAACTTGTGATCACCTTTTACGCTTACTGGATCCAACGATGGATCAAAGTTGAGAATTTCTGTGCCTTGGGTGTGAGCTGT
CTGACGAAAGGACGGGCGGTGTACTTAGTTTGGGGTAAAATAGTTGGTATAATTCTGTGCGACAGACATTTGGTCAGGCCATACTGCCATATCGTGATGTAACTATCCACACTACGTCATAGGCCCTTGTGATCAATTAAACGTTCCTCATGCCAGGCTATCTGTTTAA
GGCTTCGCGTTTAAGGCTGGATTAAGTACTCCGCCTTGTGATCTGTGATCCTCCGACCTGTGATCAGCAAGATTGGAACCTAGGTAGGCGGCGGGTCTACGCTGGCCCACAATCGTGAGTCCCCCACTCCGTAGGTTGTGGAATTTATAGACCCGCAAGGGGCACCACT
AGGATGACACCCAGGATGAATCTGGATTAGGAACACCAACCCGACATATTTGTTACCGCTGCAGCATTTCGCTCTTGGACGCGTAACCCGAGATCCGTCTCGCGATCGTCACGGATCGGGATTATGCAGGCAATACCTTGTGATCACTCCGCGCTTGGTTTTGCTAGCG
ACATCTCTAGTCACTTTTATTGAGCAGGTGGGCGGATTCATGATCCGGCTCTGTCGTACGTCCAACCACGGTGACATGTTCGGAGCTGTCGCCGTGGAGCAGAGATACATCGGATCTATCAATTTTACTAAGAGCAACTAGCCACGACAAACTGTGATCACCGATTGGA
AATTTGCGTATCTCTAGGACTCCCTCATACAAATCAAAGCTTGGATGGGTAAGATGCCGCAGCAGCAGGTATCTCATATTGGCTATTAAGAGCCAGGCCCTATGGCCTTAGTATCACCGATCAGACGTCGCATGAGCGGGCCCGTTGTCCTATCTCTTTAGCTGCCGCA
GAAGTAAAGGGGTTCCACTGCGTAGAGCGTGCCCCTCTGGTGTGCCGTACTGTTATGGTGATACAGCTTCCTTATACCCCTCGTAAAGCGGCTAATGGTCCTAATGAATGCCCTTGTGAAATCCGAATCGCTTTACAATTGCGTTCGGCGGAATGCAGTCACCAGTGTT
TACACTACGCGTTATTTACTTTTACTGAGTCCTTGTCGCCACCGAACGAGGATTGTTCATTGTATCCGGAGATTAGGAGTTCGCATCGCTGACACAGCCAGTTCGTAGCAAATACCGCTGGCCCTGGGCACTCCAGATCAGAACTACTAGCCCTAAACTCTATGACACA
TTGGGTCTCGATCCCTCTATGTTAAGCTGTTCCGTGGAGAATCTCCTGGGTTTTATGATTTGAATGACGAGAATTGGGAAGTCGGGATGTTGTGATCACCGCCGTTCGCTTTCATAAATGAACCCCTTTTTTTCAGCAGACGGTGGCCTTTCCCTTTCATCATTATACA
TTTCAAGTTACTACCGCCCTCTAGCGATAGAACTGAGGCAAATCATACACCGTGATCACCGACCCATGGAGTTTGACTCAGATTTACACTTTTAGGGGAACATGTTTGTCGGTCAGAGGTGTCAATTATTAGCAGATATCCCCCAACGCAGCGAGAGAGCACGGAGTGA
GATCCATTACCCTACGATATGTATATAGCGCCCTAGTACGGCTTCTCCCTTGCAGACACGCAGGCGCTGTGCGCTATCGGCTTCCTCGGACATTCCTGGATATAAGTAACGGCGAACTGGCTATCACTACCGCCGCTCCTTAAGCCTTGGTTTCACCGACGATTGTCGT
TAGTAGATTATTACCTGTGGACCGTTAGCTTCAAGACCGAAACGTTGGTGATGCTACTTAAATGTCAAGAGTTGCGAAGTTGGGCGAAGCACATCCGTACTCCCAAGTGGACGATCGATAGATCCATGGAGTTTCCATCCATCTTAATCCGCCCTTTGCATCACCGACG
TACAAGGCACAAACGAGACCTGATCGAACGGTGCACGGTCGAGGCAGCGAGATAAATGTACATTGAGAGCACCTTGTGATTTACGACCTGCATCGAAGGTTTCTTGGCACCCACCTGTCGTCCGCCAGGGCAGAGCCGACATTATATGACGCTGATGTACGAAGCCCCT""".split('\n')
best_motifs = RandomizedMotifSearch(dna, k, t)
print('\n'.join(best_motifs))

CATGGGGAAAACTGA
CCTCTCGATCACCGA
CCTATAGATCACCGA
CCGATTGATCACCGA
CCTTGTGCAGACCGA
CCTTGCCTTCACCGA
CCTTGTTGCCACCGA
ACTTGTGATCACCTT
CCTTGTGATCAATTA
CCTTGTGATCTGTGA
CCTTGTGATCACTCC
AACTGTGATCACCGA
CCTTAGTATCACCGA
CCTTGTGAAATCCGA
CCTTGTCGCCACCGA
TGTTGTGATCACCGC
CACCGTGATCACCGA
CCTTGGTTTCACCGA
CCTTTGCATCACCGA
CCTTGTGATTTACGA


In [17]:
k=15
t=20
dna = """AACTGTACTCATGGGGTCGGGTTGCCAACTGCGGATCGAGGCGCTAGAGGGGTCGTCGTCTGTCCCGCAGGCCAGTTAATAGCTCCCTATCTGGTGTCTACCGTGTCAATTAACCGGCCGTGATGACTTGGAGGAGTCCCCCTTTTGGTAGGTCTCAGGAAATCTTTTGCGTTCGAATCGTAGGTAACTGTACTCATGGG
GTCGGGTTGCCAACTGCGGATCGAGGCGCTAGAGGGGTCGTCGTCTGTCCCGCAGGACCAAGACGTGCGCTCCAGTTAATAGCTCCCTATCTGGTGTCTACCGTGTCAATTAACCGGCCGTGATGACTTGGAGGAGTCCCCCTTTTGGTAGGTCTCAGGAAATCTTTTGCGTTCGAATCGTAGGTAACTGTACTCATGGG
ATTGACCATTAGCGGCACCTATGGAGCCGCTAGTCCGGAAGAACAATTCCTATAACTTATAAAATTGTCGCAAACCCTAGGTAGTTACGTGGCACCTGCACGTGCGCTCCAGCCGCCCGTATGCGGATGCCCCGAGGGTGGCCGACCCACGTCGGCGGGTATTTGTAGACACGATTTGAAGAGTCGTACCGCAAATCCCG
CGGCCTTCGGCAGACCCCTACGCCTGCTCTATCGGTTGGAATGGATGAATTTAGATACTGTCTAAATCAGAGCCCTGCCGCTCCCGCAGGCGAACGCACATTCGGCTTCACGCGACTGTCGTATCGGCCAATATCAGGGTAGACTAACGTCACTGTCAGAACTCGTCACTATCGCGATCGCCGCTCTGCAACGCTCTGGA
CCATAGCTGTGCCGTGTTAGAGTGACTCGGTGATAAATTAAAGCATCAACTTGCCATGAGACACGGAAGTCCTTAGAATAATCGGTAACAGGTTCGAGAGGTTTACGGGTGATTACAGGGCATTTACCCTACCGTGCGCTATAGTCTATGCTTAGGCGACTATCATTTACTCTGCGATCGCCGCGTCACTCCTGTGTGGA
TACGTTTACGTGCGCTTTATCTCCCCTGTATCGTAAACCACCATATAATACCGTGGGGGGCTTAGTGTTAATCTAGCTGAGGTGTGCACGTTATGTTAGTTCTTCGTTGGGGAATTAAGGATGAACGATAATAAGAATGAATCTACAGCCGCTAGGAGGGGTGCCCCGTTACGGACTTCTCTTATTCCGGGGTGACATCC
CTTTCGCCGAACAACAGGAGCTTTCACTCACGGCTGCACCCCTACGTGCAGAGTTCCACCGCAAGGCAGTGCCCAGCGATATAAGTCGGATAGGGGGTGGTTGTGGCATTTATAAGATACAGTTACTTCTAACCCTAGTGGGCACGGAACGATACGCTCACATGACGGTGACGGGGCTGAAAAAGTTACGCGGCGGGGCA
TTAGCCTAGTCGATTTCTTTATAATGGCATGATGGTACAGCCCCGGATAACTACGTGCGCTGCTGGCTTATCTGTTCTATCAACGTTCCTGGCGCTCTCAAGTAGCGGAAAAATATGACTGTCACGGATGCCTAGGTGAACAAAGCGCCAGGTGTGTCCCGGACGGTCTCGCTGTGAGAGATTTTGAGGCTCACCCATGC
AATGGGGCCCACCAATTCTGAGCCCACTCGCGTAGTATTTGCTATGAGATCTAGACCAGGATGCCTATGGGCCGCAACCCCTATAAGCGCTCGATCTCACCTTGAAGGACACACACCACGTTGGCCGTACGTTAGCGATAATGTATTAGGGTAATCCTCATCATACTACCGCGACTCTGTATTATGGTCTCTTGCAACTA
GCTGTTCGTCAACTTATGCAAGATCTATAAGACTTGTCTTCACCGGAAAGAAGGATCATAGTGTTTGAAAGTCGATCAGCATATGCGATCAGAGGCTATAAGCAGAGGAGCTTACCTCTTATCATCGCGTTTCCCCTACTGGCACAGGAGCCTACGTGCGCTGTCTATACTACGTTCCGTTTAGGTTGCGGGATTCAACT
CTCCCCTACGTGCGGGTAGTAGTGGCGGTCAGTCCGTGTACAAAATTTCTGTATATAGGGAATGACGACGTAGAGACCTTATTTAGTTTACAGCAACAGGCACCAGTACTCATATTAATTGGTATTATCAACGTTGTCAGCATTACCTTCGTGGCGGTTTGGGAATCGGTACACAATCCGCCAACGTGCTCTAATTTCCT
AGGAGCGCCAGATGCCGACAATCTCTATCATGGCCACGGTGTACTCTGTGCTGCGATACAGCAGCAGCAGCTCTTGAACCCCTATATCGACGCCGAAATGCGAGGTTTGTGCTTGTAACGATTTCTACAGACCCCTCTATGCGCTGAAACCATAACGCGATTTTGCCTCCGCTAACGTAGTTCGAGTCTTAGGATATGTA
CCCCCGGGTTTTTATAGGAGACATTACCTCCAGTGATTTCGCAATTTATCAGGGACACGATTGTTCCGCATGGTATTGCATAATGTACGAACAAATGACGGGATCGCGCCTCAAGGGAGGACCCCGCGGTGCGCTTGCATCATAGGTCATCTGCGCAGACTACGGTGCACTGGCCTCGAACGCTTATGAGGAATAGACAT
CTACGAGAGAGCACTTAGTTCCCGGGTGTACTGTACCAATTTCGTCTCAATTCCTTTAGGGATTCGAACCCCTACGTGAAGTAGACGTGGTAATTGGCGGTCGTCCCAAAGAAAAGAGTTCTTGGCCTTACGAAGGTTATCGGACGATCACCCAACTGCCCCCACCGGTATGTTGACCGCTGATTGTACTGCATCTGACA
TCCCTTCGACCTCTGCACTAACGTGTCCCTTAGCATAAGATTCTACGGCACCAAAACTGGACCGAATGCCACCATAATGCGTCTTTCCAAAACAGTACGAACCAGCATGGAACCCTGCCTCGGGTCTGCGAGAGACCCCTACGTTTACTTTAGCTCTTTAGAATGCAAACGTGACTTAGGCAGCTCTGCGGGTCAGAAAC
TGTCGTGCACGTCCACTGCACATATCGCACTCCTATAACGTACGCTCAACACTACAATGCCACGCCCGGAGATGGACCCATACCGCTCCACAGGGGTATCTGGGCCGGCCGCGATTCTGTTGCATCGTTGGCATGCTAATGTTTAAATGGCACGCCCTACGTGCGCATTTCCGTGGAATTATATCTAAGTCACAATGCAT
CGTACAGCGTCTGCTGCTCCCCGGTGGAGTCGTCAGGCCGACCATCGCCACTGGGAGAGAGAGCTAGATGCTCCGGAATGGTTATCTGCGTCAATGCCCAGGAGAAATTGTTTGGTCCATCGGGCTATTTTGGCGGCCGGACACCCGGCACCCCTACCGACGCTGTCTTAGTCCCCTATCTAAATGTGCAGTATAGATGA
CATCCGCGGTGCGAGCGTGCCCCACGAAACCATATCTGGTGGGATAAGCCCGGGAGGTGGACACGTAGTACCCTGAGATCATCAACAACAGTAGGTGTTAGGCGAGGGATACCCGGTCGTGCGCTTTTGGAGGTCCAACAATTCCACCGGGCGACCAAGGGGAACAGTTAACTAGTATCGTGAAGATTACCATCGAACCG
GTCGGAATTGCTGAACAACATCATTTATGTCGCATACAAAGAAATACCCCTGATTGCGCTACGTGTGCGCACTACTTGTTAAAAGACAACGCGTGGGTCCTCCGAAGCTGGTCAGCTGCCACAAGAGAATCCCCTGCCTGTTCGTCCTAGCTATCGTAGCCATCGTGCGGACAGAAAGACTTCTAACTTTCACCCACTAA
GTATAGCAACCCAAGTTCAGTCGTGAAACAAACTCTCGGAAGAGGTCAAATTGCGAACTTAGAAAGCTCAACCCCATGGTGCGCTAAGATTCAAACATCAAGACGGAGGGAGCCGCTTGGTTGGGCCGCCGGGTCGAATTGCCCGTCAGATTCAGGAGCGTTGGGCTAAGGCCATACGCTCATGCACCCTTACCGGCGAT""".split('\n')
best_motifs = RandomizedMotifSearch(dna, k, t)
print('\n'.join(best_motifs))

AACCGGCCGTGATGA
ACCAAGACGTGCGCT
ACCTGCACGTGCGCT
ACCCCTACGCCTGCT
ACCCTACCGTGCGCT
ACGTTTACGTGCGCT
ACCCCTACGTGCAGA
ATAACTACGTGCGCT
ACCCCTATAAGCGCT
GAGCCTACGTGCGCT
CCGCCAACGTGCTCT
ACCCCTCTATGCGCT
ACCCCGCGGTGCGCT
ACCCCTACGTGAAGT
ACCCCTACGTTTACT
CGCCCTACGTGCGCA
ACCCCTACCGACGCT
ACCCGGTCGTGCGCT
ACCCCTGATTGCGCT
ACCCCATGGTGCGCT


In [21]:
np.random.choice(3, p = [0.9, 0.1, 0])

0

In [22]:
def GibbsSamplerIter(dna, k, t, N):
    indexes = np.random.randint(0, len(dna[0]) - k, t)
    motifs = []
    for i, idx in enumerate(indexes):
        motifs.append(dna[i][idx: idx + k])
    BestMotifs = motifs
    for j in range(N):
        i = np.random.randint(t)
        profile = get_profile_laplace(motifs[:i] + motifs[i+1:])
        p = []
        for idx in range(len(dna[i])-k+1):
            p.append(getProb(dna[i][idx:idx+k], profile))
        p = np.array(p)
        p = p/p.sum()
        sel_idx = np.random.choice(len(p), p = p)
        motifs[i] = dna[i][sel_idx:sel_idx+k]
        if score(motifs) < score(BestMotifs):
            BestMotifs = motifs
    return BestMotifs, score(BestMotifs)

In [23]:
def GibbsSampler(dna, k, t, N):
    motifs_arr = []
    scores_arr = []
    for i in range(20):
        motifs, sc = GibbsSamplerIter(dna, k, t, N)
        motifs_arr.append(motifs)
        scores_arr.append(sc)
    min_score_index = np.argmin(np.array(scores_arr))
    return motifs_arr[min_score_index]

In [26]:
k = 8 
t = 5
N = 100
dna = """CGCCCCTCTCGGGGGTGTTCAGTAACCGGCCA
GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG
TAGTACCGAGACCGAAAGAAGTATACAGGCGT
TAGATCAAGTTTCAGGTGCACGTCGGTGAACC
AATCCACCAGCTCCACGTGCAATGTTGGCCTA""".split('\n')
%time gibs_found = GibbsSampler(dna, k, t, N)
print('\n'.join(gibs_found))

Wall time: 765 ms
AACCGGCC
AAGGTGCC
TACAGGCG
CAGGTGCA
CACGTGCA


In [27]:
k = 15 
t = 20
N = 2000
dna = """CTCTTGATTAAGGAGATGTAAAACTCTTTCCGGACATTAACTTGTCGATTGGTTCGTTTTATGATTGTTAGCCCATACAACGAGTGCTACTTTCGACGATTACCTGGCAACAATAGACAAGTCAGGGCCGCGGAAGACTGATCCCCTATACAGACCGTTATCATGCTACGAGAACGGTTGTCTAGCAACTCTTAGCTACGTGTGACGTCCACCGGCGTCGAGCCTGGCGACTATTAAATTCGCATGCGCTAAAAGCACCTGTTATAAACGGCTGTCAGCGATGTTCGGCCGATATGCGCATCTTCGTTTCCTCTTGATTAAGGAG
ATGTAAAACTCTTTCCGGACATTAACTTGTCGATTGGTTCGTTTTATGATTGTTAGCCCATACAACGAGTGCTACTTTCGACGATTACCTGGCAACAATAGACAAGTCAGGGCCGCGGAAGACTGATCCCCTATACAGACCGTTATCATGCTACGAGAACGGTTGTCTAGCAACTCTTAGCTACGTGTGACGTCCACCGGCGTCGAGCCTGGCGACTATTAAATTCGCATGCGCTAAAAGCACCTGTTATAAACGGCTGTCAGCGATGTTCGGCCGATATGCGCATCTTCGTAAGCGCACCGGGGTGTTCCTCTTGATTAAGGAG
GAGATGATAGGTTGGCCGGTTCGCCTCGATACGGTCCACGCCTGCTGGAATCTAGCTAGACAATTGCTTAGTGGATTCATTCTCCTCACCCCTGTAATTTACCCTTACCGGGGTGGGGAGGAAATACTCCACGTAGAACACGTTTACGAGCCTAAGGGCCGAGAATCACATAAGGCGTCTAACTATTAAGTGCCTTTGGTATCGATTATTGTGTTTTTCCCCATGCCCGCAGTCCTCCACTTAATAGACTGCTATCAACTATGGTAAATCAATTTCCACGATCGGGCTCTCGAACTTCTGTGTTATCCGATACGTCGCCGAAATC
GCCTAATTGAATTATAAAGTATTTCGTCCGACATATCGCCATGTTGACTGTATGCGCATGGAATTCGCTTCGAGAAGTTCCTCGGGGTGAGGCACGTTTTGAAGAACCCGGAAGCTCCTTCGGTTGAGCCTAAGTTTACTCTATAGGCAATCTCACCATCCGCGTCCACCCAATCGCGTGAGGTAAGATCTAAGTCCGGCTGCAAGTATCCATAAGGCCCCTTGCGGATGGTCACGTCTCTTAGCAAGGAGTCAATGAGATCGGCCCTCCCTACCCTTAGTCTATGTTTTGGCATAAGCATTGGGAATTGTGTAGGATATGTGAG
CGTTTCATCTACATGACATTGCTGCTACGACATGCGTGTCGCCCTCCTGGAGCCCAGTGTTGATCACCGTGGGAACGTTCCTAATAGCTGAAGTGAGGACTGGGAATTCGTTCACTTGACGTCTCACCTGTCGATTTATGCATTTGAAGCTCAATTTGGGGGTAAATTGGAATGAGAGCGAAGAGACGTTTACCTATCCTTCTAATAGGAAACTTCTAGTTGGATGATGAGATAAGTTTTATGGGGTGTATATTGGGCGTCAATGAACCCTCGCCAGTGTAAACACCAATTTCCATTGAGGTTGGGTGGTAGAGTCCGCGGGACA
TAGACTAACCCACACGTAACCAATTGGTTTTTCGGACAGGGTGAAGGGATGTGTGCATCGAAAGTTTTTAGCTACGACTGTAATATCCACTTCACCTCTGTCCACCAGTACAATCCAGGTAATAAATCTCCTCTGGCTGGTGCTTTAAAGGGAGTCTGTTTCACGATCCTTGAACAGGTGCGTCTCACGAGGACGTGTATGAATTTTCATAATAGACGTGTTCCCGAGCCACCAACAGGAGCGTGCCTGATTCGGAAGATGCAAAGCCAATTGCATACCACCTGCACAGGAGGAGGCATGGATCGCAAGTTTACCGGGTGCAAGG
CCTACTTGACAAGCGTAGGCGCGGTACGCAAGTGTTGCGTTCTCCCTCGCAACACCCGTCAGTGCTACGGGGACGGGTTTTACGACTTGACGCTCTTCCGGCCACCTGCATTAACTCGACGGAATGAGCACGGCTCGGTAGGCGATCGAGTATGCGTCATGGGAAAATAGGAATCGGACGCCCCTCGGGCATATTAAGCCTGCGTTCGTGTTGTCCTTACGATATTAGCCTACCAAGTTTCGAGGGGTGCCAAGCTCAAGTGATCCGGAACTTTGCTTTACCACCACCGCCATCCAGGGCATTATACATCGCTCCCTTGTGACCT
TAATACACATCCTCGGACTCCACATGACGATACCACTAAAAAATCAACGACCTTTCGGCCGCATGATAGGTCATGAGGGGGCAGTTTATTCTCGGTTCCTGTTTACCGGGGTATGGTAAATCTGCAGGGTTGCACACCCGATCAGCTTGTAGGCTTTCGTGCTTTCAGATTTCTAACAATACGTTAAAGATTTTTGAGTTAGAGAAAGAGCGTCGAACATACTGTCGTACCAATTTACTCTTTACGATCATTCGCCCGCAGCATTCCGGTGCAATCGATTATTCGCATAGTCATTCCCCTGTTCCGTGGCTATTCTTCGTACCTT
AATGGGATTGCTGAACAAGAAGGCGGCTTAGACTGTCTATGGCTTCCGATCGGACTAACGGCGAATAATAGTAAGATTACGGATCCCTGACAGCTTCAGTCCGCAAACGACACCACAGGCTCCTGTAGTAAAACAGACAGCCACTATAGCGCGATTGTTGGCCCCCCCTTAAGTTGCTCGGGGTGGTCCAACAGTCCCCAGAAGACATACGACGGGATGTATATAATGAAATTCGCCTTCTTTAAGAAGATGCTCTGGCAGTTTCATATAGGGGCCCGCTGTTGAAAATCGGATGAGTGAGGATACATGCGTTTGCGTTCGTGTC
GATACTCCTATCGCGCAGTGACCTCCCTGCGTTCATATTTAGCCCTACTTTGACGAGACAGATAGCTGGGAAAGCCTATTCGACATATATACTGCGATGACTCCGGAACGTAAAAGAGTAAATCGACATATTTAGTGGCTTGGATTTGAGTAGTATCGCAACCTACGCCGATGCGGAAAATTAAACATACCGGGGTGTCCCATATGAGGGGGGCGAAATCTCCGAGGATTGAGTACTCGTGCCCCCGACTTTTTTTCGACTCGCGGCAATGAAAACCGAAGGAGGCACGAAGTGGTACATGTGTACCCCTCTTTGGTTACTCATG
CGCAGGCTCATTCGTTCACGAACACACGGAACTACCCAGCGCGTTGATGCTCCAAAACGAGGCCACGTTCACAGAACCGAAACACCGATAAAAGCGCGCCAACAACCCGACGACGCACAGGGTGAAATGGCACTTACGGCTCTTTCATGATCTTCGACCGAAGGAATGGAGGGGGTCACCTGGCCCGGCCCGGTGAGTGCTTGTATAGGCGTTTGTACTGAGGTACCAGGACCGGGCGCTGCACAAGCTGCCATTCTAGCGTATTCTCATATCCAAATGGCTCGCAAGTTTAGGAGGGTGGGGCTCCCGCCAGGCCGTCATATCC
ACGTTTCGCAGCTGAGGTAAGGAAACCGGGGTGGAATCACCCTCGAAGCTGGTCGCGCCGGCATCTATTGTTGAGCAGGTCATCACAATTCCTCTATTTCTATGATACAACTTCGACGATCCACGGGATATGTAACGCCGGAACACAGGAGTAAATGTGATTGACAGGGGCTCATCCGTCTGCCCAAACGGCATCTACGCAATGACTGCATAGGTTTTGTGTAAAAGAGTTTGTCATCTACCCAACCAGGACAAGTCAGCCCGCGCAAACGGCCCACGCGCACATATCAAGCCCGTCAGGCGCCCGCAGAAACAGATCCTAAGTT
GTGGCTGTGCGTAACCGTCTAAATGTAAAAGCGCACATGAGGTAAGTTTACACAGGTGACCCAAGTGATCCTGATCGAGATGGGTAACCGCATTTCTGTGAGTCGGGACACTGGGTGTTACCAGTTGCCAGAAATTCGGCGGGGAGTGAGTTCGGTCGGTATTTATGACTAGGTCATTGGGCTGCAGCGCTCCGCAACAGTCCATGGTTTATAGTTGGAACAGACCGGGGTGATTCATTAAAAGAACATTCATCTGCTTAGAAAAATAGATTTACGTTCCGTAGAACCGTAAGAAATTACTGGCTAACCCAACATAAAAGCTGAG
TCGAATCCGCCACATGCAAGGCTCAATGTTGACAACTCTTGTGGAGAAGACATTGCAAGACAGCTTGAAGGAGGTCCGCTAGAGCTAGTCTACGCTCCGTGTCAAAGCCTGGAGAACATACGATAATGAGTTAGACCGGGGCCAATTAGTTTACCGGGGATCGCTGAACAACCGGTCCGTGACCATACACTTAGTTGGGTAGCAATACATCTGGCCCGGTCAATTTCATCTAAGGCACCCGATATGAGGACGTGTGCAATACACATATTTTCGGTGCTGTCATGTCCTGTGAGGTTTGCATGGCTGACCGTACTAGTATTAACAG
GGCCTTAATGAATCGCTCTGTCATGCATGCATTGGGATGGGGACCCCTCCGTTAGCTGTGATGGGTCGAGACGTACGATGTACCGCCCCTTTTACCGGGGTGAGCGATTCTCGTGCGAAATGTTCTCCCACTTTGTCCGGCCGTGCGCGCAGCATACTGGGCAGCCTGCGTTCCCCGCCCCCCCACATGACCACGACTGGGTTCGCCATCGTCAGCTTAAAATCCGTATGGTTAGGGAAGATAGCGTCCAAATGGGAAGCATGCACGTAATTCAGACTGAGTCCCTCTGTATTCTGTCTTGGACGTAATGAAACTCTATAAAACT
CCCTAGCAAAAGCCCTCTTCAATCACTTGCAATTGTTCTTTACCCCCTTAACTCAGCTTGACCATCATCAATCCAAACCGAAGCTTCGGCCACATCCAGTATGCCGAACAAAGGCAGTAGATTATGCGATCATTCTGTTCTATAACTTTCTTTCTACCCTCACCGATCCACATATTAGCTGTGATTTCGAGTCATTCTGATTGACTATTCATGACTTCCGGCTAAAGACAGGTACATGAAGTGAGCCGGGGTGATACAGGAGTGGGATGCTTGGGCAGCGTTCAATTGGAGAAATCGGAGAGTTGCTACCATTCCTGCTGTCTGG
GCGTGACGGCTCTATAAGAGAACTACGACCAATAGTACCGGTGGTCCTCAGCCTTAAATATAGTGTAAAGTCGTCCGGGGTGATTCAAATGGGTGTCTTTAAACTTATTTAGAAGTACATTGTGCCTAGGTTTCCGGGACTTGCCATAATTGAGAGTCCCTCATTCTCGGTGAGGAGCGCCGAAGTCCCGTTAATCTGGCGTGTCCCGTGATGCATCATCTAAGTTATCAGTCAGTCGCACGCACTCCTACATGACTGAACCAGTGCGCGCTGAGATGGTACGCGTGCTCACTGTCCAAGGAGACGGACACGTATCAACTGGCGC
GCCTATGGAATTGCAATGGAGTTATGTCCAGTACAGAGGTGAAAGTTTACCGGACAGAGATTACCAACCCCGGGATTAGGGGAGATCGAGCTGCGGGCTCGTGGGCCAAGTATTCAACGAACAAAGCTTAAGTAAAGCAGCGAAACGCCTACCGGTACAACAGGCGGTTCAGGTGACTACCAATAAAGTAAATGTTCGGACGCAGACGTCTAAGCAAGTGACGGCCTAGGAGTTTACGCCCTACAACCCACCAGCCACCAACGGCAAATAAGTCCCTACTGACCGCGGCATTTTGCGCACCGAACTAGCCGTCAACCACATCACG
CGGTCCATGTCTCTAGCGCAAATGGATAGGTTCTGTATATACGGCACCTGGCCCAGCACGTCTTTACACAATAAACAATAACCCGAGTGGTGTTAGGTGAGACTTACTAAGGGACCCGCGCAACAACGGGTCCAAGGTGACGGATTTTAATCGTTGCGTGTCGATATCTCGCAGCATCTAAGACTGAGAATGGCGGGATTCACTCCTCGGACTAGGACATCTTCCCAAAGTTTACCAATGTGAGAGACAGAGGTGCACCACTAGGCACGGATGTATGCGCGAGCAATTGAACAATACGGTCCTGTTATACAGTTCACGTTAACAC
GTCGGTTCAGAGCAACTTTACATAGAGGAACGGAAAGAGCAACATTCTTCCCAAGTTTACCGTCATGGTTTGCGAGTACAGCGGCCGGCACTACTGGCGGAGTGAGCCACATCGTTGGCTGGGACCGAGAAACTGCGAGTCTTTAAACGGACCCGCGCCCCAGACACTAGTGTTTCCTATGCGCGCGCATAAAAAGCCAGTCCCGGTAACTGGAGTTCAGGACCAAGGAGTTTGGACAAGCTTGCTAATCGAAATACCATTTGTGTTGCGATCTTGGAGCGTGCGTAGCGCTTACGGTCGAAACGTACCCCGCAGTATTATACCC""".split('\n')
%time gibs_found = GibbsSampler(dna, k, t, N)
print('\n'.join(gibs_found))

Wall time: 1min 12s
ACGTCCACCGGCGTC
AAGCGCACCGGGGTG
ACCCTTACCGGGGTG
AAGTTCCTCGGGGTG
AAGTTTTATGGGGTG
AAGTTTACCGGGTGC
AAGTTTCGAGGGGTG
CTGTTTACCGGGGTA
AAGTTGCTCGGGGTG
AAACATACCGGGGTG
AAGTTTAGGAGGGTG
AAGGAAACCGGGGTG
AAGTTTACACAGGTG
TAGTTTACCGGGGAT
CCTTTTACCGGGGTG
AAGTGAGCCGGGGTG
AAGTCGTCCGGGGTG
AAGTTTACCGGACAG
AAGTTTACCAATGTG
AAGTTTACCGTCATG


In [28]:
k = 15 
t = 20
N = 2000
dna = """TACGCACGCATCAGGGGAGCTTGCCAAGCACTACCCAGCGCTTAACAAGCGTATCGGCGTGGAACTCTTTTGCATATAGGTGCCCCCTACATACATTCCTCTATAGTACTACTAAACCTAATAAATGATTTGAGGCGACCTCATATAAACTATGTGTTCCATCTACACATACGCTCAGTCGAAGTGTATATCCGTATCTGTCCCGGATCCTAACTAGGGGCCCGGCGAATACAGTGCGCGGCTACAAAATTGATTGATCGTTGTCGAATATGGCTGACTGACACTTATTTTCTTTCATAGCGTTTGTACGCACGCATCAGG
GGAGCTTGCCAAGCACTACCCAGCGCTTAACAAGCGTATCGGCGTGGAACTCTTTTGCATATAGGTGCCCCCTACATACATTCCTCTATAGTACTACTATATAGAGAGGAGTAGAACCTAATAAATGATTTGAGGCGACCTCATATAAACTATGTGTTCCATCTACACATACGCTCAGTCGAAGTGTATATCCGTATCTGTCCCGGATCCTAACTAGGGGCCCGGCGAATACAGTGCGCGGCTACAAAATTGATTGATCGTTGTCGAATATGGCTGACTGACACTTATTTTCTTTCATAGCGTTTGTACGCACGCATCAGG
CAACTTGCTTTTGACAGAGCTCGTACAGCCGCATGCTCGCCAAAATCCTGACGAACATGGCAATGTGCTAACGACCCTTTCGGAACACGTGCGTGTCTTCATTCCCCTCTATCTCAAGCGGGGAAAACACGAAGCCAAATAGGGATAGCCCCATGGTCCCGTGAAATTCTAATACGAGCCGACTATCTAGTGGAATCTTTCACTAATGGTCTGTCTAGGCGGGGCACTTTCGTTGTGGTGAGGTCATACAAGAATACGTGGTTTTTTCGTAGAGCTACTTTTTCGGAAATATGCCCGCGAGTAGTGTAGTCCTGGGTCTCT
TCCCGGACCTTTCTCACGAGTTTCCGGTGTACCACGGAACGCGCGAGCAGTAGTACGAGGAGTAGGGATCTTAACGCCCAAACATGCAGAGCAAATCTGACCTGTGAATAATAGGCGGACTCCTATTTAGGAGCATGACTGTGAGCCCAATAACATTTAAGGGCAAGTGGATTTCTTGCCCAAGATTGATGTTCTGACCCAGTTGCTCGTCCAACTGTTGTGTGACATGGACTATAATTAATTAGGAGGTATTGGGTGCAAACGTAAGGTGGCATCAAACATAAATGACTGTCACCCGACGGAGCGTATGGCAGTTTGTGA
AGGGTACATGACTGTCCTTCATCAATACCTTAACTCTTGGACACACCGGTATCTGGGTGGACTGTGCGCGCTGGCTTACCCTTCTACCGCCACTACGGTAGAGGATTGAGATTTATGCCGACTGGTAGGTCGCGTGGTGAGCAAAGATCGCCCGTACTGTCTGACATTTCGGCCCCCACTGCTTAGAACAGCTTAGCATCTTCACACGGGCTTTCGCCAACCTTTAATCTTCTAGAAAATGCTGCTTGACCCTCCAGGTTGAAATCCTATGCGCGAACGTTTTTGGTTCGAGCCCGCTCATGCTCCAAACTGTACGTAGCG
AACGTCTCGGTCTTAATGATCGCAGAGATGGCGTGTCTCGCTCAGACGTCCAGGAGCGCTCCCCAGGGTCTCGTGGGAGTGGGTTGGTATCAGGTGCTGTTTGCGATAGCAACCCACCAATTTGACTTCCGCTAGTATCTGGCTAACCGTCGGCTGTGGCTCTATGAGACTATATGAGCGGTAGGGGGCCAGGTGGAGCCGCTAAATCGTTAGCCGAAGTCATATGTGAGAGAATGCGATATGATCCCCACCGCGCTCTTAACCCCCGGCAGAGGATATGCGTTGGAGTAGGCTTACGCTCTCATACGAATTCGAGCATGA
TTCTCGCTGACTACCTTCGCGCGGGGATTATAATGTGGTACAAAATCCCTGGAACGTTAGGGAGTACGTTTAGTATGGAATCCGTACCCCTGTGAGACCGTGCGTCATCAGAGTACCACGCAGGCAAAAGCCCGCCCCATCTGAGGTGGAATAGGGTCGGGACCCACTTAGGTAGGGGCCAACGCACAGCCTATTGGGCAGACGATCACGCTAATCATTTACTGGATATTGAGGGCATTGAGCTAAGACTATGCACTGGAGTAGGTAGTAGATAACCTACATATCGTTGTGCTAAAGTCGCAGCATTATCCACACTTCGGT
GAATGCGAAAGGCTTTGGCGCCAGTATGGAGGAGTAGGTCTGCAAGATGAATGCCTATCAACCTTTATAAGACTCTCGCTTCCTCTCAAGTCAAGTCTACCAAGATGAATTGGGACCCGAAAACCACCCACTTTGCCATGGAAACGCGGACAAGCTCGCAGGAGGACCAGATAGTAAGATTATCCCGCACCTTTCAACCCACCACCATATCAGTTCGGGATTAAGACCTAGTCCCGATGCTATGGTTAGGAGTAGCGAACAGGGAGCGGACACAATGAGACCGCGCGCGTAAGCAGCAAATGGGACTTGATGACTGCCTTA
AAACATAGAAAGGAGGGGACAAGAAAACCGATTCCCATGAACCGAGGAGTAGCCAACGCGAACCTAGCCGAAATCCCGTAGTTTGCAGAAGCACATCGGGGAGCTGTTCGAATTGGAATGCCAGAGATCCAGCCATCCATTAATATGCCGACGAGTGAAAGACTATGGTCATGTCCTTCTCGCACGCTAAAATGCCATATACAGAGTTAACTGACAGATTATCGCTGTTATGCCTCCCGTTCTCGGGGGCTACCGCGGTGAGCGTGACTCGGGCGGTTATGTCCGAGCAGGCTTTTACGCACTTAGACATTGACATCCCGC
CAGTAGGGATCATGTCTCTGTTACATCTCGAGCTTCAATGGTGGAAACTAACAGCAGCAGGCGCAGTACGTTATTGCGACTCGAACATTCGATTTTGTCACTGTTTCTGCTCGAGTCCAAGAGTGCAACAAGCATCGTAGCTCTTGGGCGAATATAATTAGTTATGCCGAGGTAAAGTCCAACAGTCGTCTTTAGCGGGTTCGCTCTCTGAAGTCTGAAGCTTCGGAAATCTGGGGGTACATTTCCTTGCCACTCCCCTAAGTCGGACTAGTGGGGGCCAAATAAACCGCTCATTCCTTTAATAAAGCAGCAGATTCGCAA
GTTTAGTCTTTACGTATATGATTAGGAGTAGCTTAAACAGTTGCGAAACAGCCTTCGGCAAGCGTTTGCTCAGAAGTTACAAGTCGGATCAAATCCCCGGAACAACCCAATGACAAGGTCGGTACATAATGTCGATTTGCTGACGGCCATTTCATGAACCCAAGGCGAAGGTACCCAGTTGCCTTCTGCAGCCATTAGGCTATACTGGCATCACCCAGGGCGTCCCAATCGCTTAAGGATTTCTCCCCAATCGACAATAGTAACTCTTTGTCTCTCACCTCGTATTGAAATTCACCGTAATGAACGCACAGCCAATGCCAT
TTGGACCTGCAAGGGTCAACGCTCGGCAGGTAACCGCTTTGGGGGTGCAGGGGATTTCATGTTATCACACGGTACGACGCGCTGAGGGTCAATACCGACAGTGTCGTCGGAGACTAGAATAATATTTGGTCAGTACTGCCAACTGCAAGGGTCTTGCCACAGCGGATCTTTGAAGAGATTTATGGGCTATAGTGTTAGTAGGCTTAGATCAAAGCACTAGCACATGTCCGGGGCGTTCAAAAGGGGTAATAACTGCATACCGCTCAGAGCTGTGCTTAGGTCAAAATGGCTGTTAGAATTAAGCCAGAGCCGAGGAGTAGG
CTTTTTTAAGCCCCCGCCTTACGGTTTCTTGTTTCATCCCGGAAGTCTACATAGGACGTTTGATTGCGGATTGTGGGAAGAAACAAGTAACACGCACATTGCAGTTCCGAGGACTACAATGTCGGTATGCCACTGAGTAGGAAGGTACCCGTTCCCGAACTCCAATCTATCTATACGGGTAGCCTTCCGGTTGGACAGCCCCATCCGGTATTTTCTCACCTACACTGCCTCCAGGCCAGCCATCGTCCCGCTGCCGACCTATTTTTTGTTATGCATTAGAGCGATCTCGCAGTGACCGTACCCTTGTAGAGACATACGATT
GGCGAGTAGTGGGAAATATGTCTAGCTCCGCATCTGCGGCCCAGTGCATCTAAGTCTAAACATCATGGGGGTATGTTTGATGTTCTCTATAACAGTCTCAACCGGTAACCCAAGTTGTACGATGCTGGCATCAGTTCCACAGGAACCGCTTCAAGCTCGCCTAAGGAAAAACCTCAGCTCTTGACTACAATAATGTCGTCGGACAGAAAACTCTCCCCAGACACGGACGCCTCCAAAAGAGAAAGACTTAGGGATCTTGTACCCAGATACCCTCTGCAATATGCCGCTAAGTAGGGAACAGGCCACGCTAGCTACTCTTGC
TTAAAAACATAAAGTGGCAATCTATAGCCATGTGAGCAGATATCGTTCGCGTGCTACGATTCGGCAGTCCTGGTAGATTTGGTACGCGACATTCGCACTTTGGGTACGCCATTACATGGGTTACCCTCGGGCGCCCTATTACCGGTGGCCATATTAATCCAACCCATTCGACCCGCTGCCGAGGAGTACTTTATCGCTTGTGTCCACTTAAAATGATAAGGCGCACGCACGCTTTCTATCTTTGTTCGTCTCGTGGGTGAGTGCTAGTAGACGCCTCGTCCTTGTTCAGCAGCCAGCCAGCTACTAAGTCTTCTTATGTCT
ATCGAGACTTAAAAACAAAAAACATTCGGCACTGATACAGCCGCTGTCACAACCGTTGCTTGACATGACCATCCAAACAGATGACAGCGTTGGTCTGACTTCGCTAAATACCTGTAAAAGGCGTATCCTGACTGTCCACCGTGTCTGTGACTACGCTCTCAGAAGCTGACTGTCCGTATGATAAGAGTGGGACAGGGGTGTTGCAACCAACTTTTGTCGGGTTAAGGGATGAACTCCGTTACAGGCACTGGTCGCTAACTGGCAACTATCACATGATGTGGCTGCACGACCTCATAGTCGCAGTATTGAGAGGAGTAGCCG
TCTGAAAAGGAGTTCTCAGCTGGCCTTGTGTCTCGCTGAAGGCTGTAAGCCCGGGCACCTGGGGATATGTTTGGCGAAACAGGACCTACCGAACTCGTGTAGCTTGAAGGCCGGGTATATGGCCCCCGGATTTGCCCATCGCGAAACCGCCTCTGCTTGCTAATTATAGTCCGGGTAATGATATTTCCAGGAGGTTGGACAATTGGCTACGATCAGGGTATGATGCCGATGGAAGGTCTTATGTGAGCCAAAAGCAATAGAATGTTAGCTATATGCCGAGTCATAGGCGGTACGCACGATTTGAATCGCTACTCAGGTAAC
CCGTCATGCCCGTAAAGAACTTAGAAGTCCGACAGCGGCTTTATTAGTCCACGTAGCGTCACTCCCGTTAACTTGCCCACTTATTGTTTCATCCGCTGTTGATGAGACATGCCGAGGAGTCAAGGGGTAGCCTCTTCAGGTCTCAATAGGGACGAAGCATTTCATCATCCTGCAGTTCCTCAACAACTGCAGAGAGTATCATAGCGCGGAGGATGGGGACAAGGTATTTTATTGCGGACGCCCTCGCTGACATTATACAAGGGGGCGATCTCAGACTAATTTGGGCCTATCTTCAGGTTGCGAGGATCCTCTACAGGCCGA
TTCGAGTTACTTGTACGGAGTCAGGGATGTATCACGCGTCGTGCAGTTACCGCCGCTAAGACTAAATACATGAGGTGGAATATTTCAGACTTTATGGTTCATGGACGACGGATCCATCTTCGCGCTATAACAAGAGGTCAGACCAATTAACAATTCGGGCACAGACACATCGTTTCTCCAGCTTATGCCGAGGAAATGATGATGCCGACACTATCCCGCTACGCGAGGGATGCACCGAGTCTTGTGGCCTCCCTTTGGCGCGGAGTTAAACGCCGAGCGATATGGGCACCTGTTCTCTGAAATCGAGCCGGGGGGCTTCGA
TACGCCGGGGTGGGCCTAGCCAACCTGGATCACTAGAGTTCCATAAGCCGCTACCCCTCATCTCCACATCGTTTAGATGTCCTTCCCCACGCCTGCATCCGACCGATAGTCGACGCACGGTTGTTGTTCGATAAGCTGCGCCATCGCTCTGTTTTGTCAACTAACCATCATCCGGCAGACTAAGCCTCCTACAGTTTGATCGCCTCATGCGGCCCAGATGGGGTCGACGGACGGCTGACGCTTCGGCTAAATGCACGATATGCCGAGGAGGGAGTGGATTGGGATCCCTCAATGGACGTCAGAAGAAGCTAAATGTGTGGC
""".split('\n')
%time gibs_found = GibbsSampler(dna, k, t, N)
print('\n'.join(gibs_found))

Wall time: 1min 12s
TACGCTCAGTCGAAG
TATAGAGAGGAGTAG
TATGCCCGCGAGTAG
TAGTACGAGGAGTAG
TATGCCGACTGGTAG
TATGCGTTGGAGTAG
TATGCACTGGAGTAG
TATGGTTAGGAGTAG
TATGCCGACGAGTGA
TATGCCGAGGTAAAG
TATGATTAGGAGTAG
AGAGCCGAGGAGTAG
TATGCCACTGAGTAG
TATGCCGCTAAGTAG
GCTGCCGAGGAGTAC
TATTGAGAGGAGTAG
TATGCCGAGTCATAG
CATGCCGAGGAGTCA
TATGCCGAGGAAATG
TATGCCGAGGAGGGA
