# Testing Penalty terms

In [1]:
import numpy as np
from itertools import permutations
from copy import deepcopy
import c_module

In [32]:
def matrix_2_bit_stateV2(my_matrix,original_matrix):
    """
    Maps some given matrix repr. of a MSA "my_matrix" to a corresponding
    bitstring via the column encoding, whilst respecting
    original order in "original_matrix".

    Args:
        mat: 2D numpy array, e.g. array([['A', 'C', 'C', 'T'],
                                         ['A', 'C', '_', '_'],
                                         ['A', 'T', '_', '_']])
    Returns:
        numpy array containing bit repr., e.g.: np.array([1,0,0,1...])

    """
    ## Initial definitions
    current_matrix = deepcopy(my_matrix)
    nr_of_rows, nr_of_cols, gap = current_matrix.shape[0], current_matrix.shape[1], "_"
    ## List of nr of letters in i'th row of original matrix
    nr_letters_in_mat = [len(np.where(original_matrix[i] != "_")[0]) for i in range(nr_of_rows)]
    ## Needed nr of registers
    regs = np.array([list(np.zeros(nr_of_cols)) for i in range(np.sum(nr_letters_in_mat))])
    ## List of lists of letters in original matrix rows
    original_letters = np.array([list(original_matrix[i][np.where(original_matrix[i] != gap)[0]]) 
                                 for i in range(nr_of_rows)], dtype=object)
    ## List of lists of letters in current matrix rows
    current_letters = np.array([list(current_matrix[i][np.where(current_matrix[i] != gap)[0]]) 
                                for i in range(nr_of_rows)], dtype=object)

    for s in range(0 , nr_of_rows):
        for n in range(0 , len(current_letters[s])):
            ## Finding index
            reg_idx = np.where(np.array(original_letters[s]) == current_letters[s][n])[0][0] + int(np.sum(nr_letters_in_mat[:s])) # Original n'th idx
            col_idx = np.where(np.array(current_matrix[s])   == current_letters[s][n])[0][0]                                      # Current i'th idx
            ## Setting reg value
            regs[reg_idx][col_idx] = 1
            ## Changing comparator to "O" (the letter) in case of multiple of same char
            original_letters[s][np.where(np.array(original_letters[s]) == current_letters[s][n])[0][0]] = "O"
            current_matrix[s][ np.where(np.array(current_matrix[s])    == current_letters[s][n])[0][0]] = "O"
    return regs.flatten()

### Respecting initial order term

First testing function desined to penalize incorrect ordering:

\begin{equation}
    \forall n<n',\,\forall i<i'\,\, \sum_{s}x_{s,n',i}x_{s,n,i'} = 0\quad\Longrightarrow\quad p_3\sum_s\sum_{n<n'}\sum_{i<i'}x_{s,n',i}x_{s,n,i'}\quad\quad\quad\quad\quad
\end{equation}

s.t. it should return some int > 0, for any incorrect order, and int = 0 for any correct order.

Here the following string is the starting point:

In [33]:
test_DNA_string = np.array([["A","C","_","T","_"]])
test_DNA_string

array([['A', 'C', '_', 'T', '_']], dtype='<U1')

and then a function is created that takes the above string, creates all possible permutations, transforms each of them
to the corresponding bitstring and evaluates the penality according to 1):

In [34]:
def order_penalty(original_matrix):
    ## Defining N_s
    nr_of_letters = len([obj for obj in original_matrix[0] if obj != "_"])
    ## Defining M
    nr_of_columns = original_matrix.shape[1]

    ## Creating all perms
    letter_perms    = list(set(permutations(original_matrix[0])))
    letter_perms    = [np.array(list(perm)) for perm in letter_perms]
    letter_perms    = [perm.reshape((1,nr_of_columns)) for perm in letter_perms]
    bitstring_perms = [matrix_2_bit_stateV2(perm,original_matrix) for perm in letter_perms]

    ## 
    history = []
    for idx, bit_string in enumerate(bitstring_perms):
        nr_of_penalties = 0
        for n1 in range(0 , nr_of_letters):
            for n2 in range(n1 + 1 , nr_of_letters):
                for i1 in range(0 , nr_of_columns):
                    for i2 in range(i1 + 1 , nr_of_columns):
                        Xsni1 = bit_string[nr_of_columns * n2 + i1]
                        Xsni2 = bit_string[nr_of_columns * n1 + i2]
                        nr_of_penalties += int(Xsni1 * Xsni2)
            
        history.append([list(letter_perms[idx][0]),nr_of_penalties])
        
    return history

Finally each permutation is displayed alongside the corresponding penalty:

In [35]:
print("#### [permutation, penalty] #####")
order_penalty(test_DNA_string)

#### [permutation, penalty] #####


[[['_', 'A', 'T', 'C', '_'], 1],
 [['A', 'T', '_', 'C', '_'], 1],
 [['A', '_', '_', 'T', 'C'], 1],
 [['T', '_', 'C', 'A', '_'], 3],
 [['_', '_', 'T', 'C', 'A'], 3],
 [['T', '_', '_', 'C', 'A'], 3],
 [['_', '_', 'A', 'T', 'C'], 1],
 [['C', '_', 'A', '_', 'T'], 1],
 [['C', 'T', 'A', '_', '_'], 2],
 [['T', 'A', '_', '_', 'C'], 2],
 [['A', '_', '_', 'C', 'T'], 0],
 [['_', '_', 'C', 'A', 'T'], 1],
 [['A', '_', 'C', '_', 'T'], 0],
 [['C', 'A', '_', '_', 'T'], 1],
 [['A', 'T', 'C', '_', '_'], 1],
 [['_', 'C', 'T', '_', 'A'], 2],
 [['T', 'C', '_', '_', 'A'], 3],
 [['_', 'A', '_', 'T', 'C'], 1],
 [['C', '_', 'T', 'A', '_'], 2],
 [['C', '_', 'A', 'T', '_'], 1],
 [['T', '_', 'A', 'C', '_'], 2],
 [['_', 'T', 'A', '_', 'C'], 2],
 [['A', 'C', '_', 'T', '_'], 0],
 [['_', '_', 'T', 'A', 'C'], 2],
 [['A', 'C', '_', '_', 'T'], 0],
 [['C', 'A', 'T', '_', '_'], 1],
 [['_', 'C', '_', 'T', 'A'], 2],
 [['_', '_', 'A', 'C', 'T'], 0],
 [['A', '_', 'C', 'T', '_'], 0],
 [['A', '_', 'T', '_', 'C'], 1],
 [['_', 'T