In [18]:
from typing import Dict, List, Tuple
import numpy as np
from matplotlib import pyplot as plt
from Bio.SubsMat import MatrixInfo

In [111]:

def substitution_matrix(scores: Dict[Tuple[str, str], float], alphabet=None, gap_penalty=0):
    if alphabet is None:
        alphabet = sorted(set( letter for substitution, score in scores for letter in substitution ))
    letter2index = { letter: i for i, letter in enumerate(alphabet) }
    n = len(alphabet)
    matrix = np.zeros((n, n))#, dtype=int)
    for (x, y), score in scores.items():
        xi = letter2index.get(x, None)
        yi = letter2index.get(y, None)
        if xi is not None and yi is not None:
            matrix[xi, yi] = score
            matrix[yi, xi] = score
    matrix[1:, 1:] += gap_penalty
    return matrix, alphabet, letter2index


In [78]:
mat, alph, index = substitution_matrix(MatrixInfo.blosum30, alphabet=list('ACDEFGHIKLMNPQRSTVWY'))
n_amino = len(alph)

In [79]:
norm = 1/np.sqrt(mat.diagonal())
mat2 = norm.reshape((-1,1)) * mat * norm.reshape((1,-1))
mat2 = np.array(np.round(mat2*10), dtype=int)

In [80]:
norm3 = 1/2 * mat.diagonal()
mat3 = mat - norm3.reshape((-1,1)) - norm3.reshape((1,-1))

In [81]:
for row in mat:
    print(*row, sep='\t')

4	-3	0	0	-2	0	-2	0	0	-1	1	0	-1	1	-1	1	1	1	-5	-4
-3	17	-3	1	-3	-4	-5	-2	-3	0	-2	-1	-3	-2	-2	-2	-2	-2	-2	-6
0	-3	9	1	-5	-1	-2	-4	0	-1	-3	1	-1	-1	-1	0	-1	-2	-4	-1
0	1	1	6	-4	-2	0	-3	2	-1	-1	-1	1	2	-1	0	-2	-3	-1	-2
-2	-3	-5	-4	10	-3	-3	0	-1	2	-2	-1	-4	-3	-1	-1	-2	1	1	3
0	-4	-1	-2	-3	8	-3	-1	-1	-2	-2	0	-1	-2	-2	0	-2	-3	1	-3
-2	-5	-2	0	-3	-3	14	-2	-2	-1	2	-1	1	0	-1	-1	-2	-3	-5	0
0	-2	-4	-3	0	-1	-2	6	-2	2	1	0	-3	-2	-3	-1	0	4	-3	-1
0	-3	0	2	-1	-1	-2	-2	4	-2	2	0	1	0	1	0	-1	-2	-2	-1
-1	0	-1	-1	2	-2	-1	2	-2	4	2	-2	-3	-2	-2	-2	0	1	-2	3
1	-2	-3	-1	-2	-2	2	1	2	2	6	0	-4	-1	0	-2	0	0	-3	-1
0	-1	1	-1	-1	0	-1	0	0	-2	0	8	-3	-1	-2	0	1	-2	-7	-4
-1	-3	-1	1	-4	-1	1	-3	1	-3	-4	-3	11	0	-1	-1	0	-4	-3	-2
1	-2	-1	2	-3	-2	0	-2	0	-2	-1	-1	0	8	3	-1	0	-3	-1	-1
-1	-2	-1	-1	-1	-2	-1	-3	1	-2	0	-2	-1	3	8	-1	-3	-1	0	0
1	-2	0	0	-1	0	-1	-1	0	-2	-2	0	-1	-1	-1	4	2	-1	-3	-2
1	-2	-1	-2	-2	-2	-2	0	-1	0	0	1	0	0	-3	2	5	1	-5	-1
1	-2	-2	-3	1	-3	-3	4	-2	1	0	-2	-4	-3	-1	-1	1	5	-3	1
-5	-2	-4	-1	1	1	-5	-3	-2	-2	-3	-7	-3	-1	0	-3	-5	-3	20	

In [83]:
for row in mat2:
    print(*row, sep='\t')

10	-4	0	0	-3	0	-3	0	0	-2	2	0	-2	2	-2	2	2	2	-6	-7
-4	10	-2	1	-2	-3	-3	-2	-4	0	-2	-1	-2	-2	-2	-2	-2	-2	-1	-5
0	-2	10	1	-5	-1	-2	-5	0	-2	-4	1	-1	-1	-1	0	-1	-3	-3	-1
0	1	1	10	-5	-3	0	-5	4	-2	-2	-1	1	3	-1	0	-4	-5	-1	-3
-3	-2	-5	-5	10	-3	-3	0	-2	3	-3	-1	-4	-3	-1	-2	-3	1	1	3
0	-3	-1	-3	-3	10	-3	-1	-2	-4	-3	0	-1	-2	-2	0	-3	-5	1	-4
-3	-3	-2	0	-3	-3	10	-2	-3	-1	2	-1	1	0	-1	-1	-2	-4	-3	0
0	-2	-5	-5	0	-1	-2	10	-4	4	2	0	-4	-3	-4	-2	0	7	-3	-1
0	-4	0	4	-2	-2	-3	-4	10	-5	4	0	2	0	2	0	-2	-4	-2	-2
-2	0	-2	-2	3	-4	-1	4	-5	10	4	-4	-5	-4	-4	-5	0	2	-2	5
2	-2	-4	-2	-3	-3	2	2	4	4	10	0	-5	-1	0	-4	0	0	-3	-1
0	-1	1	-1	-1	0	-1	0	0	-4	0	10	-3	-1	-2	0	2	-3	-6	-5
-2	-2	-1	1	-4	-1	1	-4	2	-5	-5	-3	10	0	-1	-2	0	-5	-2	-2
2	-2	-1	3	-3	-2	0	-3	0	-4	-1	-1	0	10	4	-2	0	-5	-1	-1
-2	-2	-1	-1	-1	-2	-1	-4	2	-4	0	-2	-1	4	10	-2	-5	-2	0	0
2	-2	0	0	-2	0	-1	-2	0	-5	-4	0	-2	-2	-2	10	4	-2	-3	-3
2	-2	-1	-4	-3	-3	-2	0	-2	0	0	2	0	0	-5	4	10	2	-5	-1
2	-2	-3	-5	1	-5	-4	7	-4	2	0	-3	-5	-5	-2	-2	2	10	-3	1
-6	-1	-3	-1	1	1	-3	-3	-2	-2	-3	-6	-2	-1	

In [84]:
for row in mat3:
    print(*row, sep='\t')

0.0	-13.5	-6.5	-5.0	-9.0	-6.0	-11.0	-5.0	-4.0	-5.0	-4.0	-6.0	-8.5	-5.0	-7.0	-3.0	-3.5	-3.5	-17.0	-10.5
-13.5	0.0	-16.0	-10.5	-16.5	-16.5	-20.5	-13.5	-13.5	-10.5	-13.5	-13.5	-17.0	-14.5	-14.5	-12.5	-13.0	-13.0	-20.5	-19.0
-6.5	-16.0	0.0	-6.5	-14.5	-9.5	-13.5	-11.5	-6.5	-7.5	-10.5	-7.5	-11.0	-9.5	-9.5	-6.5	-8.0	-9.0	-18.5	-10.0
-5.0	-10.5	-6.5	0.0	-12.0	-9.0	-10.0	-9.0	-3.0	-6.0	-7.0	-8.0	-7.5	-5.0	-8.0	-5.0	-7.5	-8.5	-14.0	-9.5
-9.0	-16.5	-14.5	-12.0	0.0	-12.0	-15.0	-8.0	-8.0	-5.0	-10.0	-10.0	-14.5	-12.0	-10.0	-8.0	-9.5	-6.5	-14.0	-6.5
-6.0	-16.5	-9.5	-9.0	-12.0	0.0	-14.0	-8.0	-7.0	-8.0	-9.0	-8.0	-10.5	-10.0	-10.0	-6.0	-8.5	-9.5	-13.0	-11.5
-11.0	-20.5	-13.5	-10.0	-15.0	-14.0	0.0	-12.0	-11.0	-10.0	-8.0	-12.0	-11.5	-11.0	-12.0	-10.0	-11.5	-12.5	-22.0	-11.5
-5.0	-13.5	-11.5	-9.0	-8.0	-8.0	-12.0	0.0	-7.0	-3.0	-5.0	-7.0	-11.5	-9.0	-10.0	-6.0	-5.5	-1.5	-16.0	-8.5
-4.0	-13.5	-6.5	-3.0	-8.0	-7.0	-11.0	-7.0	0.0	-6.0	-3.0	-6.0	-6.5	-6.0	-5.0	-4.0	-5.5	-6.5	-14.0	-7.5
-5.0	-10.5	-7.5	-6.0	-5.0	-8

In [130]:
mat, alph, index = substitution_matrix(MatrixInfo.blosum30, alphabet=list('ACDEFGHIKLMNPQRSTVWY'))
gappen = 10
mat4 = mat + gappen
norm = 1/np.sqrt(mat4.diagonal())
mat4 = norm.reshape((-1,1)) * mat4 * norm.reshape((1,-1))
for row in mat4:
    print(*row, sep='\t')

1.0000000000000002	0.36004114991154784	0.6131393394849657	0.6681531047810609	0.47809144373375745	0.6299407883487121	0.43643578047198484	0.6681531047810609	0.7142857142857143	0.6428571428571429	0.7349684152591671	0.6299407883487121	0.5248906591678238	0.6929348671835834	0.5669467095138409	0.7857142857142858	0.7590721152765896	0.7590721152765896	0.24397501823713327	0.36788360369097944
0.3600411499115478	1.0	0.30905754998184354	0.529237746757157	0.30123203803835463	0.2721655269759087	0.19641855032959657	0.3849001794597505	0.3600411499115478	0.5143444998736397	0.3849001794597505	0.4082482904638631	0.2939723678960656	0.3628873693012116	0.3628873693012116	0.4114755998989118	0.3975231959999626	0.3975231959999626	0.2810913475705226	0.17660431427533915
0.6131393394849658	0.30905754998184354	0.9999999999999998	0.6308932681440448	0.2564945880212885	0.4866642633922876	0.3746343246326776	0.3441236008058426	0.6131393394849658	0.5518254055364692	0.40147753427348304	0.5948118774794626	0.450563556889582

In [125]:
matx.sum(1)

array([-27., -80., -39., -40., -59., -49., -28., -30., -40., -46., -32.,
       -20., -39., -28., -31., -16., -26., -35., -85., -52.])