# Incremental Bitstring Kernel Encoding

$[(A/B/C)(+/*/Stop)] + [(A/B/C)(+/*/Stop)] + ... + [(A/B/C)(Stop)]$

$[(|\mathcal{B}|D)(|\mathcal{O}|)] + ...$

$A_1 = 1 = 0x0$

$A_2 = 2 = 0x1$

$A_3 = 3 = 0x10$

$A_4 = 4 = 0x11$

$B_1 = 5 = 0x101$

$...$

TODO: 
- fix dtypes
- clarify notation and examples
- think about how to handle stop and see if this is an encoding worth pursuing
- refactor

In [2]:
import numpy as np

In [34]:
# define operations
add  = np.array([0, 0])
mult = np.array([0, 1])
stop = np.array([1, 0])

ops_n_bits = len(add)

kernel_families = ['A', 'B', 'C']
D = 4

k_map = np.arange(len(kernel_families) * D).reshape(len(kernel_families), D)

In [39]:
# number of bits to represent the kernel encoding
kern_bit_length = int(np.ceil(np.log2(len(kernel_families) * D)))

def encode_kernel(family, dim):
    d = dim - 1
    i = kernel_families.index(family)
    binary_str = bin(k_map[i, d])[2:]
    n_bits = len(binary_str)
    
    
    kern = np.zeros(kern_bit_length)
    binary_arr = np.array(list(binary_str))
    kern[-n_bits:] = binary_arr
    
    return kern

In [40]:
for family in kernel_families:
    for d in range(1, D + 1):
        kern_encoding = encode_kernel(family, d)
        print(family + str(d) + ':', kern_encoding)
    print('')

A1: [0. 0. 0. 0.]
A2: [0. 0. 0. 1.]
A3: [0. 0. 1. 0.]
A4: [0. 0. 1. 1.]

B1: [0. 1. 0. 0.]
B2: [0. 1. 0. 1.]
B3: [0. 1. 1. 0.]
B4: [0. 1. 1. 1.]

C1: [1. 0. 0. 0.]
C2: [1. 0. 0. 1.]
C3: [1. 0. 1. 0.]
C4: [1. 0. 1. 1.]



In [38]:
A1 = encode_kernel('A', 1)
B2 = encode_kernel('B', 2)
C4 = encode_kernel('C', 4)

A1_plus = np.hstack((A1, add))
B2_mult = np.hstack((B2, mult))
C4_stop = np.hstack((C4, stop))

full_kernel = np.hstack((A1_plus, B2_mult, C4_stop))
print('A1 + B2 * C4 = ', full_kernel)

A1 + B2 * C4 =  [0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 0.]


In [30]:
def decode_kernel(full_kernel):
    # first split into units A1_plus, B2_mult, C4_stop
    # define unit = kernel + operation. e.g. A3* or B4 stop
    n_bits_per_unit = kern_bit_length + ops_n_bits
    unit_sections = [hop_size for hop_size in range(n_bits_per_unit, len(full_kernel), n_bits_per_unit)]
    units = np.split(full_kernel, unit_sections)

    kernel_dec = ''
    for unit in units:
        # split into kernel and operation
        kernel, op = np.split(unit, [kern_bit_length])
        # decode kernel
        binary_str = ''.join(str(int(b)) for b in kernel.tolist())
        kern_int = int(binary_str, 2)
        i = (int)(kern_int / D)
        d = kern_int % D
        dim = d + 1
        family = kernel_families[i]

        kernel_dec += family + str(dim)
        
        # decode operation
        operation = ''
        if np.array_equal(op, add):
            operation = ' + '
        elif np.array_equal(op, mult):
            operation = ' * '
        elif np.array_equal(op, stop):
            operation = ' Stop'
            
        kernel_dec += operation
        
    return kernel_dec

In [32]:
decode_kernel(full_kernel)

'A1 + B2 * C4 Stop'

In [33]:
for family in kernel_families:
    for d in range(1, D + 1):
        kern_encoding = encode_kernel(family, d)
        kern_decoding = decode_kernel(kern_encoding)
        print(family + str(d) + ':', kern_encoding, 'decoded as:', kern_decoding)
    print('')

A1: [0. 0. 0. 0.] decoded as: A1
A2: [0. 0. 0. 1.] decoded as: A2
A3: [0. 0. 1. 0.] decoded as: A3
A4: [0. 0. 1. 1.] decoded as: A4

B1: [0. 1. 0. 0.] decoded as: B1
B2: [0. 1. 0. 1.] decoded as: B2
B3: [0. 1. 1. 0.] decoded as: B3
B4: [0. 1. 1. 1.] decoded as: B4

C1: [1. 0. 0. 0.] decoded as: C1
C2: [1. 0. 0. 1.] decoded as: C2
C3: [1. 0. 1. 0.] decoded as: C3
C4: [1. 0. 1. 1.] decoded as: C4



Problems:

what about unused part of bitstring space?
why do I need stop operation?