In [1]:
import numpy as np
from math import log

# generate a random matrix
dna = np.random.choice(list('ACTG'),(7,21),replace=True)

# substitution matrix
DNA_1 = {'G': { 'G':1, 'C':0, 'A':0, 'T':0 },
         'C': { 'G':0, 'C':1, 'A':0, 'T':0 },
         'A': { 'G':0, 'C':0, 'A':1, 'T':0 },
         'T': { 'G':0, 'C':0, 'A':0, 'T':1 }}

# calculates substitution matrix
def calcSubstitutionMatrix(alignments, alphabet, maxVal, smooth=5):

  matrix = {}
  counts = {}
  
  for letterA in alphabet:
    subDict = {}
    
    for letterB in alphabet:
      subDict[letterB] = 0
  
    matrix[letterA] = subDict
    counts[letterA] = 0
  
  totalRes = 0.0
  totalSub = 0.0

  for align in alignments:
 
    numPos = len(align[0])

    for i in range(numPos):
 
      letters = []
      
      for seq in align:

        letter = seq[i]
        if letter == '-':
          continue
    
        letters.append(letter)

      for letterA in letters:
        counts[letterA] += 1
      
        for letterB in letters:          
          matrix[letterA][letterB] += 1

      numLetters = len(letters)
      totalRes += numLetters    
      totalSub += numLetters * numLetters

  averageComp = {}    
  for letter in alphabet:
    averageComp[letter] = counts[letter]/totalRes      

  maxScore = None
  for resA in alphabet:
    for resB in alphabet:

      expected = averageComp[resA] * averageComp[resB]
      
      if not expected:
        continue

      observed = matrix[resA][resB]
      weight = 1.0 / (1.0+(observed/smooth))

      observed /= totalSub
      observed = weight*expected + (1-weight)*observed

      logOdds = log(observed/expected)
                  
      if (maxScore is None) or (logOdds>maxScore):
        maxScore = logOdds
      
      matrix[resA][resB] = logOdds

  maxScore = abs(maxScore)

  for resA in alphabet:
    for resB in alphabet:
      matrix[resA][resB] = int(maxVal*matrix[resA][resB]/maxScore)

  return matrix
  

print(calcSubstitutionMatrix([dna,], DNA_1, 10))

{'G': {'G': 4, 'C': -2, 'A': 0, 'T': -1}, 'C': {'G': -2, 'C': 7, 'A': -6, 'T': -2}, 'A': {'G': 0, 'C': -6, 'A': 10, 'T': -2}, 'T': {'G': -1, 'C': -2, 'A': -2, 'T': 3}}
