In [1]:
import sys

In [2]:
def profile_most_probable_kmer(text: str, k: int, profile: list[dict[str, float]]) -> str:
    """Identifies the most probable k-mer according to a given profile matrix.

    The profile matrix is represented as a list of columns, where the i-th element is a map
    whose keys are strings ("A", "C", "G", and "T") and whose values represent the probability
    associated with this symbol in the i-th column of the profile matrix.
    """
    text_length = len(text)
    kmer_scores_dict = {}
    for i in range(text_length-k):
        score = 1
        kmer = text[i:i+k]
        for i in range(k):
            score = score * profile[i][kmer[i]]
        kmer_scores_dict[kmer] = score
    probable_kmer = max(kmer_scores_dict, key=kmer_scores_dict.get)
    return probable_kmer

In [3]:
text = "ACCTGTTTATTGCCTAAGTTCCGAACAAACCCAATATAGCCCGAGGGCCT"
k = 5
profile = [{'A': 0.2, 'C': 0.4, 'G': 0.3, 'T': 0.1}, {'A': 0.2, 'C': 0.3, 'G': 0.3, 'T': 0.2}, {'A': 0.3, 'C': 0.1, 'G': 0.5, 'T': 0.1}, {'A': 0.2, 'C': 0.5, 'G': 0.2, 'T': 0.1}, {'A': 0.3, 'C': 0.1, 'G': 0.4, 'T': 0.2}]
profile_most_probable_kmer(text, k, profile)

'CCGAG'