In [1]:
import numpy as np

# generate a random matrix
dna = np.random.choice(list('ACTG'),(7,21),replace=True)

print(dna)

# substitution matrix
DNA_1 = {'G': { 'G':1, 'C':0, 'A':0, 'T':0 },
         'C': { 'G':0, 'C':1, 'A':0, 'T':0 },
         'A': { 'G':0, 'C':0, 'A':1, 'T':0 },
         'T': { 'G':0, 'C':0, 'A':0, 'T':1 }}

# Profile with alignment position and fractions of each residue          
def profile(alignment):

  n = len(alignment[0])
  nSeq = float(len(alignment))
  prof = []
  
  for i in range(n):
    counts = {}
    
    for seq in alignment:
      letter = seq[i]
      if letter == '-':
        continue
    
      counts[letter] = counts.get(letter, 0) + 1
    
    for letter in counts:
      counts[letter] /= nSeq
    
    prof.append(counts)

  return prof                    
                                                                      
                                                      
# measures conservation  
def getConservation(align, simMatrix):

  # list 
  conservation = []
  # convert alignment into profile
  prof = profile(align)
  
  for compDict in prof:
    
    items = list(compDict.items())  # do not need list() in Python 2

    items.sort( key=lambda x: x[1] )
        
    score = 0.0
    
    for resA, compA in items:
      for resB, compB in items:
        score += compA * compB * simMatrix[resA][resB]
 
    bestLetter = items[-1][0]
    maxScore = simMatrix[bestLetter][bestLetter]
   
    score /= maxScore
    conservation.append(score)
  
  return conservation


print(getConservation(dna, DNA_1))

[['G' 'T' 'T' 'G' 'G' 'C' 'T' 'G' 'G' 'C' 'G' 'T' 'T' 'A' 'T' 'T' 'T' 'G'
  'T' 'C' 'G']
 ['A' 'T' 'C' 'G' 'A' 'A' 'T' 'A' 'A' 'T' 'C' 'T' 'G' 'C' 'G' 'C' 'A' 'T'
  'T' 'G' 'A']
 ['T' 'G' 'T' 'T' 'C' 'C' 'A' 'A' 'G' 'T' 'A' 'G' 'G' 'A' 'C' 'A' 'C' 'T'
  'A' 'A' 'T']
 ['T' 'T' 'A' 'A' 'A' 'T' 'C' 'G' 'G' 'G' 'G' 'A' 'G' 'C' 'A' 'A' 'T' 'G'
  'T' 'G' 'A']
 ['T' 'A' 'T' 'G' 'A' 'G' 'T' 'G' 'C' 'C' 'T' 'A' 'A' 'A' 'T' 'T' 'G' 'T'
  'T' 'A' 'G']
 ['T' 'T' 'G' 'G' 'G' 'C' 'C' 'T' 'G' 'G' 'C' 'C' 'G' 'G' 'G' 'T' 'T' 'A'
  'T' 'G' 'G']
 ['C' 'A' 'G' 'G' 'C' 'G' 'A' 'A' 'T' 'A' 'C' 'G' 'C' 'T' 'T' 'T' 'A' 'T'
  'G' 'G' 'T']]
[0.3877551020408163, 0.42857142857142855, 0.30612244897959184, 0.5510204081632654, 0.346938775510204, 0.30612244897959184, 0.346938775510204, 0.3877551020408163, 0.3877551020408163, 0.26530612244897955, 0.30612244897959184, 0.26530612244897955, 0.3877551020408163, 0.30612244897959184, 0.30612244897959184, 0.42857142857142855, 0.30612244897959184, 0.42857142857142855, 0.5510