In [46]:
import re
import numpy as np

In [47]:
fasta = """>Rosalind_1
ATCCAGCT
>Rosalind_2
GGGCAACT
>Rosalind_3
ATGGATCT
>Rosalind_4
AAGCAACC
>Rosalind_5
TTGGAACT
>Rosalind_6
ATGCCATT
>Rosalind_7
ATGGCACT
"""

In [48]:
names = [name[1:] for name in re.findall(">Rosalind_\d+", fasta)]

In [49]:
dna_strings = [string.replace("\n", "") for string in re.split(">Rosalind_\d+", fasta)[1:]]

In [50]:
def oneHot(string, alphabet = 'ACGT'):
  map = dict(zip(alphabet,range(len(alphabet))))
  one_hot_list = []
  for char in string:
    one_hot = [0 for _ in range(len(alphabet))]
    one_hot[map[char]] = 1
    one_hot_list.append(one_hot)
  return one_hot_list

In [51]:
one_hot_array = np.array([oneHot(i) for i in dna_strings])

In [52]:
one_hot_profiles = one_hot_array.transpose(1, 0, 2)

In [57]:
freq_mat = [profile.sum(axis = 0).tolist() for profile in one_hot_profiles]

In [62]:
int_enc = [row.index(max(row)) for row in freq_mat]

In [66]:
alphabet = 'ACGT'
map = dict(zip(range(len(alphabet)), alphabet))
consensus_list = [map[i] for i in int_enc]
concensus_seq = "".join(consensus_list)
concensus_seq

'ATGCAACT'

In [54]:
freq_mat = freq_mat.T.tolist()

In [55]:
dict(zip('ACGT',freq_mat))

{'A': [5, 1, 0, 0, 5, 5, 0, 0],
 'C': [0, 0, 1, 4, 2, 0, 6, 1],
 'G': [1, 1, 6, 3, 0, 1, 0, 0],
 'T': [1, 5, 0, 0, 0, 1, 1, 6]}

In [56]:
freq_mat

[[5, 1, 0, 0, 5, 5, 0, 0],
 [0, 0, 1, 4, 2, 0, 6, 1],
 [1, 1, 6, 3, 0, 1, 0, 0],
 [1, 5, 0, 0, 0, 1, 1, 6]]

In [69]:
def concensus_profile(fasta, alphabet = "ACGT"):
  dna_strings = [string.replace("\n", "") for string in re.split(">Rosalind_\d+", fasta)[1:]]
  one_hot_array = np.array([oneHot(i, alphabet) for i in dna_strings])
  one_hot_profiles = one_hot_array.transpose(1, 0, 2)
  freq_mat = [profile.sum(axis = 0).tolist() for profile in one_hot_profiles]
  int_enc = [row.index(max(row)) for row in freq_mat]
  map = dict(zip(range(len(alphabet)), alphabet))
  consensus_list = [map[i] for i in int_enc]
  concensus_seq = "".join(consensus_list)
  freq_mat = np.array(freq_mat).T.tolist()
  profile_dict = dict(zip(alphabet, freq_mat))
  return concensus_seq, profile_dict

In [71]:
seq, profile = concensus_profile(fasta)

In [72]:
print(seq)
for i in profile.values():
  print(*i, sep = " ")

ATGCAACT
5 1 0 0 5 5 0 0
0 0 1 4 2 0 6 1
1 1 6 3 0 1 0 0
1 5 0 0 0 1 1 6


In [73]:
profile.items()

dict_items([('A', [5, 1, 0, 0, 5, 5, 0, 0]), ('C', [0, 0, 1, 4, 2, 0, 6, 1]), ('G', [1, 1, 6, 3, 0, 1, 0, 0]), ('T', [1, 5, 0, 0, 0, 1, 1, 6])])

In [74]:
fasta = """>Rosalind_7575
GTTCGCAAAGCTTATCTAACCGCTACCACATCAATTTCAAGGGAGGCAACCTATACGGTT
ACGGGTTTTGGCACCCGGCGGGAGCGCTCCAGTTCCATAATCCTTGAACAGACTTAGAGG
CTCGTGCTCTGGTGTGAATCCAATGCCGCATGATTTGAATGGAAACGTGCATTTCTAAAT
TGGGAATGTTTATCCCCCCTCGTGCCAGTCTTCTTTAACTTAGGAGAGACTTGGGTGGGA
AAGAAATGGCCAAACTAAAATTGAGGACTTTAGAAGCTCATAGACCTCAGGATGCGATAA
TGGCAGTCTAGGTAAGTTCCTAAACCTAGTGATACGGTGCCTTTCCACCAATATTTACCA
GAGCGAAATAGAACTAATTCTAAGTTTGAGTGTGACCGTGTGAGATCGTGTTCCAAGATG
TAGAACGCTACGAACTATCAACGATCTATGCTACCCAAATGGCTTACGGTAACAAGGGCC
ATCGTCTCTTTTTTGCCCAAGACACCTCCTTCAGATCGGGCACAAGCAGGAAGGGCTGCC
ATGTAGAACCGTCTGTCGACAGTCTTACAGTGGGGCCAAGAACATAGGCCCGCCCGGATT
GGACTTTGTTATGAAACGTCCCCACTAGTGAAATCCAAGGCTCTGTCTCTCAGCACTCAT
CTGCTCAACTGGCTAAGGGGCAGGTTATCTCTGAGATTTGGGGCGGCCTAACATCATGGA
TTTCGGTGGAACACGTGGGAAGCGTAGCCCCGCCGGGCGTTATAGCGCAGTCGCGGAGCA
TGCGCTTCACACCTGGGTTAGCTGATGTTGATCGTAGATTCCTAAACACAAAATGCTCGT
CTAGAACTGGGAGGTAAAGTTGAGCGTCACATCGAGAGGCGTCGCGGACTAGCATGCGCA
CTGACGTAGTGTGCCCTACAGTAAAGTTCAACAAAGCAGAATCTTACACGGATACTGCGC
CGGTACAAGCGGAGTTGGTCGGCAATACTACTCTAGACG
>Rosalind_1186
GTCAACCGGTCGCAAATGTGCCCACTTTCAATCGCTCCATCGACACCCGAGAACAGTCAC
CGCGGATAGTCGCGCCTTATTTCCCTGGCCCAACTAAGGGCCCGGTACCCCGGTTTAGTG
TGCGATGGGTGTTACTGATGGCCGGCTAAGCACACAGAAAGGCTTGGCACCACGCCAGTG
CTTTTAACGACCACCTTGCACATCGCAAAGCGACGTAAAGGCTTTCTACGTAGGAACAGT
CTAATTATGGTGTGTCTCGGCACTAACGGTGGGCACGACAGGCACTCCGTCCCGTGCCGA
ATAGGAGCCCGGTAATTTGATAAGAGGCGCGCAAAATGGGCCACGTCGAAGTAAAAATCA
AACATGTTAAGAGGTGTATCACCACTTCAAAAACCCAGTTTTTAGGGAGGATAGACCTAG
TATTTAAAGTGCAGCTACTCCTGTTAACAGCGCGCACAGAGTAAATGTGCTTATTTATTA
ATGTATGATCCTCTGGGGTACGGTAAAATGAGTATGGATGGAGTAGCGCTAAAGGGCCGA
GTGAAAACAAAGATAGGCTATACCTCTTCCGAAACTAACGTTCCTTTTGAGTGGGAGACT
AGGCCGATAGCACAGATTCGTATAATCGTAGTAACCTACATTAGGGCAGTTTGTCTATTC
TAGCCGCAATTGAAACGTAGGTGCTTATTCGGTGGTTGCCGCGTCTCCCCCTAGTGTCGT
TGATCGTAATCATAGGACCGTCGTTGTGCTTACACCGGCTCCCTGCCGTCGAAGGAGTGT
GGCATCTCAACAGCACGGTACTGCTCTAGTAAGGATCACGCGTCTTCCCTCAACATACGC
ACCGCCCAGAAGACTAATAAAGCTGTATCATCCATAGTGCCCCGAGATGTGCGATGTTTT
GCCAGGGGCTTTTGACCCCGGGGGCAGGCGAGGCATGCACCATAGGAGGTGGCGGGACAC
CTGGATTGGGACTAAATCAAGGAATTTTACATGATTGGG
>Rosalind_9488
GCCTGTAAAATGCCTTTGCAACTTGGTGGTATCTTCAAGCACCGTTAGAGTTGGGCTTTC
GAAGCAGAAGCCGCCTATCGGAACCACCCCACTGTGGTGGTCATATAGTCGACTGCCGAT
CTGAGGCTCTACTTCCCTCAGTAGCTTCCTGACCGTTCTTTTGCTCCTATGTCATGTTAC
CAAATACTCCCCTGAGGGGACGGACTACTCTCATGATTATTCAAGCTTGATTAGTCCGCG
TTTTGCGGGGCCGACATATTGTGGTTCCCATTAACTTTCTGTACTCGTTGCAGTCCTATG
TCTCGCCTAATAGAGCTAATTAACTATCGGTCTGGGATAGTCCGGGACCATGCCCGTCCT
AGCCATTCCTTTACGATTCCGTGCGCAAGGTGCCGTGTGAACCTACGCACCCGATCTTGC
CGGAGCACTGACGGTGATTCTAGGACGATTCTGTGTTAGGACGTAAATCACGCATATTGT
TCAAGGCCGGTGTAAACTACATCTATGACCTCTTTATCTAACGATGTCAATAGACCTGTG
AGTATCGGCGACGTACCCGGCTCCGGGCCGTTTTACATGAACGGCTGGAGAGTAGTAATT
CGCTCCTGCTATGTAAGTATTAATTGGGGAATTCTGACGAACCAGGACAGAAATCCTAGG
TTGGCCCTGTGGACGAGGGCGCCTCGTCATACTGCAAACGATAAGGTCGTGTGCTCTCCA
TTGATTCTACGGGTTAGGGATCGAAAGCGTGCATTTTCTATAGCTGATCGACGGCCGTGC
CTTTGAGTTTTGGAGGCTATAGGTACAGCGCCAGGATTAACGGGGACGATGAAGCTGGAC
CGAAAAACTGACAGATAGTAAGCACTACGGGGCCTGCTGTAGCCATACCTTCGCGCGTAC
CATCGCGTACCAATGTCGACAATATGTGTTTGAGACTTCTAAGTCAGAGGTTGCAAATGG
TTCTTGAGGGATTACCATTTTGAACGCAGTACCAGCGGC
>Rosalind_5577
CATACAGAAGCATATCGGCGATGTATTAGTGAGCGTACGCCATCGAGAACTCGATGCAAC
GCTATGCTCAGCAAACAGGTCCTCTAACCTCTCTTTAACTCCTTAGAGACGCGCGACGTT
ACTCACGTTATGATTGGAAGCGACTACCGACAAGTATCACTTACTCCTCCTGAACTTCGA
GAAGCCACTGTTGCAAGTTTGCACACTCACCCGAAAACGCTCTACGCTACTGTCCCTCGA
TCCACTGGGTTCAAATTCTACGCTGGTTCTCTACGCCCGTTCCTTTGGTCCCAGATTTTA
ATCCACGCGCTATGTTGGTTTAATCGGACGGACAATACTGATACGCCCGAATCTTCATCG
GCCTCCAAACTACAGTGAGAGTGCCTAAAATATGTGCATACACCGTCCGGCCTATGTCAG
GCTGGATTACTCCTCCCGTCGTGTGAAGACTGTAGGGGTGGAACGTATTGCTAGAAGACA
TACCCTTCATGAAGCGGTGATCCTGGTGAGACTTGTGGGTCTGGGGAATAATGATGGGTA
CTCGGTAGTTTCAACATAACCACCAGGGTTTCGGTGTTTAATTCTTCTGAAGGTAGGTCT
TACACGATTCGTACCGATAAAAGTTTCTGTGTTGATGCTGATGCCTCAACATGTCGCTAG
ATATGGTTGTTGACTCTACCCATACAGTGCGTAATCTCAAGGTGTTGTTCGGACAACTTC
CGCACTGGCATTTTACGAATACTCGGGTGTCTCTACTGTACGACCATATTTTTTTGGATC
TTAGGGAGCTCTACAATATTACCGTATCCGAAGCTAACAGTGCAGGGGACGATGAGACTT
GCCGAAACGTTCACAGCATTAGTCAACCGAGGCTATAAGGATTACTCCCACTACCACTTC
TTATTCTTCAGGTTGGGACGCCTGCAAAGTCTCATGTCGCATAATGGCAAATCGCCGGTC
CTCAAACATAGAGTGTTAGCTCCTTGCGCCTGTCAAAAG
>Rosalind_5132
TCAGATGTATGCTCGCTAAGTGCCTGGTGACGACTACTTGGAGCCCCATACCCGGCGTCT
GACGTGCCGTTAGATACACGTTTAGGCAAGTCCCCACGGACTCGAAGCCGACCCCAAGTA
CATGTTATCTATCTAGCGATGGATAGTCCACAATTCCCTCCGAGTTGCTGTGCGCGCGCA
TACTGGAAAAAGACGAGCCTCCGGGCCACGCCCTGCATAGAGTTTTATATTAAAGGGTAA
CTAGAAGGTAAAACAGCCTGCCGGTGAGGTAGTAAAACGGGTGCATAATCCGTTACTCAC
GACACCGCAAGACTACCAACCGATAGGCGGATGTAACAGGTTGCGGGCTGAATCATTCGG
TATCGGTTGAGCTTTAACCGCAGCGTAGTCTCAATGTAATTAGTGCTCCGATTACCATGA
ATAATCGTAAGTGGTCGCCCAGGCTTAACGAATGCCCTATTAAGTCGGCAAATAAGTTAA
CAATGTTAATGTACCCTTTTATCCATGGAAACCCGATCGAAGCCTGGAGTAAGCTCGACA
CCTGATAGCTTAGGAGCGCGGAGCTTTAAACCAAATGGGCCTACGATATTGGGAAAATTG
TGTTTGTTGACACACACTGATGTATATTTTGCCTTGGCTACTCGATTACGACGTAATCCA
GAACCACCTTGTGTTGTACGCAGTTTCTTCTAGAGTCGCATGGGAAACAGTACTCGGTCC
ACTTCGCACACCTAAGAAGTAAGCGATTTACATCAAATGGTGACAGGACGTAGCACTGAG
AGCATCAACGATAAAGGGCAATGTTAGCTCAGGTCTTTTGAGCTACCGGGACGCACATTA
TTCATTTGGCCTAAGGCAGATGACTGAGCTAGTTACTAGTCCTCACGTCAACAATACAAC
CGGCGGCCAGAGTCGCGGAAGAGGTTACCGTTGAGCGACCCCTATTAAGTACAGAGAACT
GGTCCATCGGGTGGGTCGTAGGAGTTGGGAAAGGTGCGA
>Rosalind_8863
CAGGATTCTTAAAAGTATTTTCAAGAATTAAATGTGATGTGTCCTTGTATGTCAACCAGA
CTGATGCAAGGTGGCTCATATGAGACTTCCTCTTCACAGCAGGTATGGGCATGCGTTATA
CTACTAACTGGGCCCGGGTGGGCCCCCAGCATTGGGGCAGAATAAATCTACCATCACCTG
CCAGTTCTGGAGTCAGGCAAGAAACGCGATCAATGATTTCACCCTGTTAGCCTGATCTAA
TGAAAATCGGGCTACCTATTTGGTCAGCAGCACGGCCACGAGGACTTGCGGGTGGAACCT
CGGAGTCTTAATCAAAAGTGTAATTACATTTTCGACTGGCCCGAGTGTTCACAACCCCCG
CTACAGGCTACGGCAGAAGGTAGGGTGACTAGTTAGAAGCGACAGGCATTGTTAGATGGT
CGTAGTTATTGGGTGGTATTCTATCGCGAACAGAAAGAACCCCCCCGCATACCAATGTGA
TCGGACTAGGAACTATACACGTGGTTCTGATATAAGCCACTCCATTAGGTCGCGGTAAAT
CATACGTGTTGGATAGAGACATTAATTAGATAAAGGAGCAAATCCGCGAACGTGAAGTCC
ACACGATAAATCCGCTCACATACAAACCCTGCCCCACGCTTCTGAGCACTCGACGTGCCC
CAAGTTATTTCCGCATTCCGGCGGAGGGTACGTTTGACTCCATATAACAATTTGTCTCCA
CCTCTACATAAGGGTATTAAGGATTTGAAGTCTTGAGTTATCAACTCGAAACCTGTGTAA
GTTATCCCGTATCCCAGGTTACACTAAAGACCGCAAAGAAGTCTTGTACTACGTCGGTCG
TCATGCTCATAATAAATCTGGGGTTACGTCCCTGGAGTGTGTGTGGGCGATGAACTAATC
GAGACGGCGGCGACACTTCCACATACATCTAAGATGAGAGGGCATCCCTCGGTAGCCCCA
TTAACGCCCCCAAGCTATGGTATACTCTGGCCTGGAAGC
>Rosalind_1474
TTACAGAATCGGGTTGGCTTATCGGGGCCTTATCAAACCGGGCGGTGTCCGACACTCCCG
AGGGACGTGGCCTCGGTATCGGATCGACAAGAGTCGTTGGAAATTCCTATTGAAGGGCAC
CGGGCAAGAGAAGCTTGATTCATGTTACGGTATGCCATGACATGCCATGGGGTCTGGTCG
CGCCTTGAAGATTCGGCACGTACGACAAACAGACTTTACCCGCCCGATTAGAGACAGCTC
GAACAGTTAGCGACACTGTCGATTCAAGAACCCTCCAGCGAGAGCTCTGAGTGAAAAGCC
ACGTGCACTCAGGTCTACTGCTCCAACTGGGCCCATCTTCCCATTCTGATCCTTTTATTC
TCTGAAGTTCATCTCGATCAGTACGAACATCACTCTGCACAATAGATCCACAGTGTCTAC
AATAATGGGCTAGCAGGGGTTCGAGTGAGAGGTTCAAGTGTCCAAATAGGATGATCAAAG
GGCCAGAGCTGGTACGGTGACTTATACACTAATCGGACCCGTCCGACGATTTCGTGCTGA
TGGCATGGACTCCGGCCTCAACACTGAGTTAGTGAGATCGATTGAATGATGAACCGAATA
GTAAGCATGGCAATAACCCATCTCGTGTAGTAGACAGCAGCAAGCGTTCCTTAGTCAGGA
TAGCTCCATAAACGGGCTTAGGCTCGGTTCCCTGGGTGGCGTGCAGACAAGCAAACGAAT
TGTCATAAGTCGAGTACGAGCTCCGCCTGGGGTATCCACTCGACACGTGAATGTTCCATC
GTAAGAACCCGACTAGGTCAGCTCCTTCCTAGAGGATATCGTGCGTATTAGTCCTGCAGT
CTCGAGCGTCACACGGGTCGCGGACACGCCGTGCACTTGGACTGTCTTAGTGTTTTAATT
GGTGAAAACCGGTATGCCCCCAGTCTACATCGATTGGCACGTTCGAATCGCACAAATTAC
ACCTGAATAAACTGGTCGAACCATACACACCCGGGAAAG
>Rosalind_1022
ATCGTAAGCCGTGCCAGAGGTGTATACTAGGTTCAGTCTAGGATAATATTGTGCCAGTGC
CGTGAGTGCCTTTCTCCCCAGGTGCGACCCCTTCAATTCAAGTTAACGTATATGCCACGG
GCAAGTGTCAGAAATTATAGAACTCCTTGCAAGGGGGGCATACTCAAGTGGCTGGTCTGA
GCTATCGGCCTAGGAATTTGACACGGTCCCTACCGTGTAGCCCTAATCCGGCCAGGATCG
AGCACCACCAGGAATCTATCCAAGCGAGTATTTGCACTACTCTTGCACTCGTCGTTAAAC
TTTGACTTTTAATCTACGCCAGATGGGGCTCCTGCGATCTTTCATGGACCCATTGGATAT
TATCATTCTCCATTCCTCTGGTCAGCCTCAATGGGACACGCATGGAAAAAGCCGTACTCC
AAGGTTTTGAGTACCAGAACGAACTCAATCCGTTAAGATTCATTATTAGGAATTCCGACC
CTTCTCCATTGATGCTATACCAGTTATTCATGTGAACTAGCCCAAGGTTCCCCCTATCGT
GTCGTTATTATCATTAATCCCAGTGTTCCATCATCCGCTGGTACCGTATCACCGATCCCA
CAGATACATTAGTGCGGCCACTCTCACCATTAACGTACAGTATATCATGGTAAGAGGCTC
AGACATATGCTTGCCGGTCATTCTCGGATAGGGCACGCGCAAAGTGCAGGTTTGTGGGCG
TAGACGCCACAATCCAGGCGCATGTTAGATGCAGTGCACATCCGCAGCGAGAAGCTAAAC
GCATTGTTGGTTAGATTTAAGGATGATTTAACCTGAACCAACGGCAATTTTCGTGTCACC
TATGCGGCTACTCGAGTCGGGAATTCAGTAAGATTAACGGATGGCGTCTTCTTCTGATCA
TGTTACTTATAACAGAGGTACAACTGTTTTGGACGACATCCAACATAGGTCAATGCCCCT
TGAAACCTCTAGAAAGAGTAATACGGTATCGTGCTGTTA
>Rosalind_7080
GAATCGATAATTCAGTGTACATATACAGGGGCGGTGCCCGGGAGCACGGGTTTCTTGACA
CAGATAGAGGGCGCTACCGCGGTCACAGTGGGTAAGGATGATTAAAATAATATCCTGGTA
CCATAGATTCCTAGCCCTTAGCAGTCCAGGCGACGCTATCTGCGGCCTTAAAATCTCGCG
TACTGAATCTATTGCTTGTTATCCATTGTCTCGAGGAGCTACAGAGGATACTCCAGAAGA
AGGGACAATTTAGCCCGGCACTAAGTGGTTCAGTCGTAGGTAGTCAAAGGGAATAGATGC
TGTCGTTTCTGGAAATTTCTGACCGAGCTTGACGGATTCTGAGCGCATTTTTTTGGGAAC
GTACACAGATAGAAAGTACCTGCTTATCCACACCGTCCTCACGCATATCAGGGAAGTGGG
CTGTTTCTAGTAGAGCGCCCGGGCGTGCCAAACTCAGCTGCCCACGATAGATAAAGTTTT
AATCACGGTTCAACTCGGCCCCAGGCTAGTGTATTAACTTCCTTAGTCGAATGGTTGTAC
GTTGCCGGCACACTGAGGAGAGACAGACTACATAATATCGAAGGCCAATGTCTGTGTCTC
AGTAGACATAGTACACGGAACTCAGTCGCTGGAGAAAGATATAACACAGCTAGTCACGCG
CGGCTCTCCTGTACTGTGCGGTGTCGACGGGTGCATGACGTCACCAATATTCGCTTCCCT
CATCCAAGGGACCGTCCGTATGAGTTGACTTTCCGGGTAATAGAGGCTTGTCGACTCGAC
CCGACCATTCACTTTAGGCTTCTGCTCGGCGGGACACCGGACTCATGGTATTTCAGGAAC
GCTTAACGGTCTCTTGCTATCTCTAGTACACCCTTCAGGTAGAAGAGTGTCGTAACCTAT
CTCCATTCCCTTGCAATGGGCGTCCGGGCATCCTACTTTGGCTATGCACCAATGCGCATC
AGGAAACTGTTACGGGCTCGCGAGTAGGAGCCTTACAAT
>Rosalind_3183
TTTCGAAAGAGGATGGATGCTTGGGTATGACTGGCAGAGTCCGCCCACGTCTCTTATCGA
GGGCTTAAGGGCGTCCCGCCAAGGTGTAAATTCCGCCTGGCCCGACGCATCGGTTTACCT
CAGTAGTTCACCCCATGCCGAGTGCCATACGCCCGTATGACGTATGTCCTCACTCCGGAT
CGCTTTCGCACTAAGGATCTTATGCTGGTCATCACAGCGGAGTTTATATTAAGTCGCGCA
TCGCGTAGCCAAAGATATAGCGGTTCGTCTCATTATCGTGTATCTGAGAACATAGACCCT
CCAGCCTCATGCGACATGATCGGACTCGCACGGCATGTTTTTTCGCTTCGCGAGTTTATC
TACCGTCCCTCCTCACGGAGACGGTTTTCCGATTCGCAATCCCTCTAGCCTTGCTCGTTA
GGGGACTAAGTAGCGTTGACGACAGAGCGCTGAGTGACGTGTCAACCGACCGGTGCCAGA
CCGGCACAGATGAACCCTGAGGTAGTGTTTGTTCAGAAGTAATATAATGACAGTCGTAGT
AATTACTTCGAGATTGGCAGATAATAAGCAACGATGCATTATGACTTCGCCTCCGTAGTC
CCCTCACTGGCCGGTGAGCACTGCCACACGCTATATGATAGGATCGTAAGTAGGGCGGAA
TAACACATAGTTATCTAGAGTAATACTGGGACGACCGGAATGCTCCATACCGTTTGCTAT
AGATTCGTGATAATTGGGTATCCGTGTATACGCGCCTCGATCAAAAGTAGAGGGTTTAGA
GCACTGTGACCTAGCTCGGTGTGAGAAATACTGTCATTTCCGCTTCGGAAGCTCCTGCAA
GCTTGTCGAGTATTCACGAGGATACCTGTCATGACTAACAGGCTTCGTTCCGTATAAGCT
AGTATCACCCAGAGGCCATAAAACACTCGTATCCGTCCACTAAGCAAATACTTAAGGCCT
GAACCCGCAGTTGTTGACATGACCTTCGGCACAACTCTT"""

In [75]:
seq, profile = concensus_profile(fasta)

In [76]:
print(seq)
for i in profile.values():
  print(*i, sep = " ")

GTACAAAAAAGGCAGCGAAGATCTGGATGAATGCTAACGGGGACCACAACGTCAACGACCCGGGTGCAGGGCGCCCCACCGGACCGACCCCCTCCAATGGACCTAAAGACGAGCGTAGTACCAGAGATCTGGACCGGATGGGAGCCCCGACAAGGCGCAATGAATCCTTCCACTCTCGAGCACTTAAGCAATTCAGGCCTCAACCCAGACCCATGAATAGACTTTGTTAATAGGAGCGGATGAAAAAGGGCAAAACTATACAGTCGAGCTCAGAACCACGTAGACTACTGCATGAAACACTCGCGCTCTAGATAATTGATTAATAAGCGGGCCGAAATGGCTACGCACCAATATTTACCCTACCAGTCTACAACAGAACCGTGCGTAAAATATCCGCATCAACAGTACCGCTGATCTTGGAAGATCTTAAGAGCCCGCTCGAGAGAAAAACGTTCAGATGGCCAAAATGGATAAACGTCAAACCACTATTGAAACCGTAACTCTATTACTACTCAAACGGCCCAAGAAGAAAGGTGTAGAATTGATAGCATCATAGCGACAAACTGACCATCAAAGATCGATGCCTTGAAAGCGAGAATTAGAACATTTACTAAAACTCATACACTCGCTGTACCAACAAATAGCGCACGTAGTCCGCAATAACTCATGTGGACAGGGCGGAGTCGGTTCGCGAGCTGCCGGGCCGACAATTACTGTCCTTGTCCGCAGAAATGTAGGAATCCGTAGACTCCCCGCGCCATCAAAAGTAGACGGCTGAACGTAATCACACATACAGGGTAACGCTATATAACGGCATATGCGCCAACGAAGAACAGGCACCCCGAACCGGAAACAGCAAGAGATCAAGCAAGCTAAATGTATCGAGGTCTCGTATAATTCCGTAACTCCCAGTCGCCGCACAACCGACCTAGAAAGCCACAATATAAAGGAATAAGACCCCGAAAACCGGAAAGGTAGTAGGAATTCGGCACGAAAAGG
