In [24]:
# Input:  A set of kmers Motifs
# Output: Count(Motifs)
def Count(Motifs):
    k = len(Motifs[0])
    count = {'A':[0]*k,'C':[0]*k,'G':[0]*k,'T':[0]*k}
    t = len(Motifs)
    for i in range(t):
        for j in range(k):
            symbol = Motifs[i][j]
            count[symbol][j] += 1
    return count

In [25]:
# Input:  A list of kmers Motifs
# Output: the profile matrix of Motifs, as a dictionary of lists.
def Profile(Motifs):
    t = len(Motifs)
    k = len(Motifs[0])
    profile = Count(Motifs)
    
    for key,v in profile.items():
        v[:] = [x / t for x in v]
    return profile

In [26]:
# Input:  A set of kmers Motifs
# Output: A consensus string of Motifs.
def Consensus(Motifs):
    k = len(Motifs[0])
    count = Count(Motifs)
    consensus = ""
    for j in range(k):
        m = 0
        frequentSymbol = ""
        for symbol in "ACGT":
            if count[symbol][j] > m:
                m = count[symbol][j]
                frequentSymbol = symbol
        consensus += frequentSymbol
    return consensus

In [27]:
# Input:  A set of k-mers Motifs
# Output: The score of these k-mers.
def Score(Motifs):
    score=0
    t = len(Motifs)
    k = len(Motifs[0])
    consensus=Consensus(Motifs)
    for i in range(t):
        for j in range(k):
            if (Motifs[i][j]!=consensus[j]):
                score+=1
    return score

In [28]:
# Input:  String Text and profile matrix Profile
# Output: Pr(Text, Profile)
def Pr(Text, Profile):
    p=1
    for i in range(len(Text)):
        p=p*Profile[Text[i]][i]
    return p

In [29]:
# The profile matrix assumes that the first row corresponds to A, the second corresponds to C,
# the third corresponds to G, and the fourth corresponds to T.
# You should represent the profile matrix as a dictionary whose keys are 'A', 'C', 'G', and 'T' and whose values are lists of floats
def ProfileMostProbableKmer(text, k, profile):
    p=-1
    kmer=text[0:k]
    for i in range(len(text)-k+1):
        if Pr(text[i:i+k],profile)>p:
            p=Pr(text[i:i+k],profile)
            kmer=text[i:i+k]
    return kmer

In [30]:
# http://www.mrgraeme.co.uk/greedy-motif-search/

In [31]:
# Input:  A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
# Output: GreedyMotifSearch(Dna, k, t)
def GreedyMotifSearch(Dna, k, t):
    BestMotifs = []
    for i in range(0, t):
        BestMotifs.append(Dna[i][0:k])
    n = len(Dna[0])
    for i in range(n-k+1):
        Motifs = []
        Motifs.append(Dna[0][i:i+k])
        for j in range(1, t):
            P = Profile(Motifs[0:j])
            Motifs.append(ProfileMostProbableKmer(Dna[j], k, P))
        if Score(Motifs) < Score(BestMotifs):
            BestMotifs = Motifs
    return BestMotifs

In [32]:
import math
a=[0.2,0.2,0.9,0.1,0.1,0.1,0.3]
c=[0.1,0.6,0.4,0.1,0.2,0.4,0.6]
g=[1,1,0.9,0.9,0.1]
t=[0.7,0.2,0.1,0.1,0.5,0.8,0.7,0.3,0.4]
data_list=[a,c,g,t]

H=0.0
for j in data_list:
    for i in j:
        H=H+i*(math.log(i,2))
print (-H) 

9.916290005356972


In [33]:
# Input:  A set of kmers Motifs
# Output: CountWithPseudocounts(Motifs)
def CountWithPseudocounts(Motifs):
    k = len(Motifs[0])
    count = {'A':[1]*k,'C':[1]*k,'G':[1]*k,'T':[1]*k}
    t = len(Motifs)
    for i in range(t):
        for j in range(k):
            symbol = Motifs[i][j]
            count[symbol][j] += 1
    return count

In [34]:
def ProfileWithPseudocounts(Motifs):
    t = len(Motifs)
    k = len(Motifs[0])
    #profile = {} # output variable
    profile=CountWithPseudocounts(Motifs)
    for key,v in profile.items():
        v[:]= [x/(t+4) for x in v]
    return profile

In [52]:
def Motifs(Profile,k,Dna):
    motifs=[]
    for i in range(len(Dna)):
        motifs.append(ProfileMostProbableKmer(Dna[i],k,Profile))
    return motifs

In [36]:
# selects a random set of motifs from a set of Dna strings
import random
# Input:  A list of strings Dna, and integers k and t
# Output: RandomMotifs(Dna, k, t)
# HINT:   You might not actually need to use t since t = len(Dna), but you may find it convenient
def RandomMotifs(Dna, k, t):
    motifs=[]
    for i in range(t):
        num=random.randint(0,len(Dna[0])-k)
        motifs.append(Dna[i][num:num+k])
    return motifs

In [37]:
#keeps iterating till we get the best set of motifs
def RandomizedMotifSearch(Dna, k, t):
    M = RandomMotifs(Dna, k, t)
    BestMotifs = M
    while True:
        Profile = ProfileWithPseudocounts(M)
        M = Motifs(Profile,k,Dna)
        if Score(M) < Score(BestMotifs):
            BestMotifs = M
        else:
            return BestMotifs 

In [38]:
# Input: A dictionary Probabilities, where keys are k-mers and values are the probabilities of these k-mers (which do not necessarily sum up to 1)
# Output: A normalized dictionary where the probability of each k-mer was divided by the sum of all k-mers' probabilities
def Normalize(Probabilities):
    new_dict=Probabilities
    t=sum(new_dict.values())
    for i in new_dict:
        new_dict[i]/=t
    return new_dict

In [39]:
#Biased Die
import random
def WeightedDie(Probabilities):
    n = random.uniform(0, 1)
    for p in Probabilities:
        n -= Probabilities[p]
        if n <= 0:
            return p

In [40]:
a=[0.45 ,0.63 ,0.09 ,0.27 ,0.36]
s=sum(a)
n= [x/s for x in a ]
print(n)

[0.24999999999999997, 0.35, 0.04999999999999999, 0.15, 0.19999999999999996]


In [107]:
a=[3,4,5,6,7,8]
a=[3,2]
a

[3, 2]