In [135]:
import json
from src.utils import DataConfig
from src.parser_files import get_letters
import numpy as np
from src.plots import *
import pandas as pd
import itertools

letters = get_letters("data/letters.txt")

letters_dict = {}
current_letter = 65
for letter in letters:
    letters_dict[chr(current_letter)] = letter
    current_letter += 1

all_groups = list(itertools.combinations(letters_dict.keys(), r=4))
avg_dot_product = []
max_dot_product = []

for i in range(len(all_groups)):
    group = np.array([v for k,v in letters_dict.items() if k in all_groups[i]])
    orto_matrix = group.dot(group.T)
    np.fill_diagonal(orto_matrix, 0)
    #print(f'{all_groups[i]}\n{orto_matrix}\n-------------------------------------')
    row, _ = orto_matrix.shape
    avg_dot_product.append((np.abs(orto_matrix).sum() / (orto_matrix.size - row), all_groups[i]))
    max_v = np.abs(orto_matrix).max()
    max_dot_product.append(((max_v, np.count_nonzero(np.abs(orto_matrix) == max_v) / 2), all_groups[i]))

df = pd.DataFrame(sorted(avg_dot_product), columns=["|<,>| medio", "grupo"])
df.head(15).style.format({'|<,>| medio': "{:.2f}"}).hide(axis='index')

"|<,>| medio",grupo
1.33,"('A', 'L', 'T', 'V')"
1.33,"('L', 'R', 'T', 'X')"
1.67,"('A', 'J', 'L', 'V')"
1.67,"('F', 'I', 'U', 'X')"
1.67,"('I', 'L', 'R', 'X')"
1.67,"('I', 'R', 'U', 'X')"
1.67,"('L', 'R', 'T', 'V')"
2.0,"('A', 'L', 'T', 'X')"
2.0,"('A', 'L', 'V', 'Z')"
2.0,"('F', 'U', 'V', 'Z')"


In [136]:
df.tail(15).style.format({'|<,>| medio': "{:.2f}"}).hide(axis='index')

"|<,>| medio",grupo
17.67,"('C', 'E', 'G', 'O')"
17.67,"('E', 'G', 'O', 'S')"
18.0,"('B', 'E', 'G', 'S')"
18.0,"('C', 'E', 'G', 'S')"
18.0,"('C', 'O', 'Q', 'U')"
18.0,"('G', 'O', 'Q', 'S')"
18.0,"('G', 'O', 'Q', 'U')"
18.33,"('B', 'D', 'O', 'Q')"
18.33,"('H', 'M', 'N', 'W')"
18.67,"('A', 'H', 'P', 'R')"


In [137]:
df2 = pd.DataFrame(sorted(max_dot_product), columns=["|<,>| max", "grupo"])
df2.head(15).style.format({'|<,>| max': lambda x: 'max: {:,.0f} | count: {:,.0f}'.format(*x)}).hide(axis='index')

"|<,>| max",grupo
max: 3 | count: 1,"('A', 'L', 'T', 'V')"
max: 3 | count: 1,"('L', 'R', 'T', 'X')"
max: 3 | count: 2,"('A', 'J', 'L', 'V')"
max: 3 | count: 2,"('F', 'I', 'U', 'X')"
max: 3 | count: 2,"('I', 'R', 'U', 'X')"
max: 3 | count: 2,"('L', 'R', 'T', 'V')"
max: 3 | count: 3,"('F', 'U', 'V', 'Z')"
max: 3 | count: 3,"('L', 'P', 'T', 'V')"
max: 3 | count: 3,"('P', 'U', 'V', 'Z')"
max: 3 | count: 3,"('R', 'U', 'V', 'Z')"


In [138]:
df3 = df2.merge(df)
df3 = df3[['|<,>| max', '|<,>| medio', 'grupo']]
df3.head(15).style.format({'|<,>| max': lambda x: 'max: {:,.0f} | count: {:,.0f}'.format(*x), '|<,>| medio': "{:.2f}"}).hide(axis='index')

"|<,>| max","|<,>| medio",grupo
max: 3 | count: 1,1.33,"('A', 'L', 'T', 'V')"
max: 3 | count: 1,1.33,"('L', 'R', 'T', 'X')"
max: 3 | count: 2,1.67,"('A', 'J', 'L', 'V')"
max: 3 | count: 2,1.67,"('F', 'I', 'U', 'X')"
max: 3 | count: 2,1.67,"('I', 'R', 'U', 'X')"
max: 3 | count: 2,1.67,"('L', 'R', 'T', 'V')"
max: 3 | count: 3,2.0,"('F', 'U', 'V', 'Z')"
max: 3 | count: 3,2.0,"('L', 'P', 'T', 'V')"
max: 3 | count: 3,2.0,"('P', 'U', 'V', 'Z')"
max: 3 | count: 3,2.0,"('R', 'U', 'V', 'Z')"


In [139]:
df3.tail(25).style.format({'|<,>| max': lambda x: 'max: {:,.0f} | count: {:,.0f}'.format(*x), '|<,>| medio': "{:.2f}"}).hide(axis='index')

"|<,>| max","|<,>| medio",grupo
max: 23 | count: 1,11.33,"('O', 'Q', 'S', 'W')"
max: 23 | count: 1,12.33,"('O', 'Q', 'S', 'X')"
max: 23 | count: 1,13.0,"('O', 'Q', 'S', 'Y')"
max: 23 | count: 1,12.0,"('O', 'Q', 'S', 'Z')"
max: 23 | count: 1,11.67,"('O', 'Q', 'T', 'U')"
max: 23 | count: 1,6.0,"('O', 'Q', 'T', 'V')"
max: 23 | count: 1,8.33,"('O', 'Q', 'T', 'W')"
max: 23 | count: 1,7.33,"('O', 'Q', 'T', 'X')"
max: 23 | count: 1,10.0,"('O', 'Q', 'T', 'Y')"
max: 23 | count: 1,8.0,"('O', 'Q', 'T', 'Z')"
