In [6]:
from collections import defaultdict
import math

import numpy as np
import cupy as cp
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [7]:
RNG = np.random.default_rng()

if cp.cuda.is_available():
# if cp.cuda.is_available() and False:
    xp = cp.get_array_module(cp.empty(1))
    xp_array_from_gpu = lambda a: a.get() if isinstance(a, cp.ndarray) else a
    xp_array_to_gpu = lambda a: cp.asarray(a) if isinstance(a, np.ndarray) else a
else:
    xp = cp.get_array_module(np.empty(1))
    xp_array_from_gpu = lambda a: a
    xp_array_to_gpu = lambda a: a
    
print(f'xp = {xp.__name__}')

xp = numpy


In [9]:
# sbv_dims = [1000]
# non_zero_bits_list = range(1, 10)
# sbv_dims = [1000, 1200, 1500, 1800, 2000, 2500, 3000, 3500, 4000, 5000]
# sbv_dims = [256, 512]
# sbv_dims = [256, 512, 1024]
# sbv_dims = [1200, 1300, 1400, 1500]
sbv_dims = [1000]
# non_zero_bits_list = range(2, 26)
# non_zero_bits_list = range(2, 31)
non_zero_bits_list = range(2, 3)
index = []
columns = defaultdict(list)
# min_entities_count = 128 ** 2 # ~16k 
# min_entities_count = 32 ** 2 # ~1k
# min_entities_count = 100
# min_entities_count = 9 * 4 * 50 * 2 # 10 degree, 3600
# min_entities_count = 6 * 4 * 50 * 2 # 15 degree, 2400
# min_entities_count = 5 * 4 * 50 * 2 # 18 degree, 2000
# min_entities_count = 3 * 4 * 50 * 2 # 30 degree, 1200
# min_entities_count = 36 * 128 * 128
# min_entities_count = 128 * 10 + 128 * 10 + 36 * 10
# min_entities_count = 1000 * 24 # 15 degree fidelity
min_entities_count = 1000

for sbv_dim in tqdm(sbv_dims, desc='sbv_dim'):
    index.append((min_entities_count, sbv_dim, 'bundle_size_0_FP'))
    index.append((min_entities_count, sbv_dim, 'bundle_size_1_FP'))
    index.append((min_entities_count, sbv_dim, 'bundle_size_2_FP'))
    index.append((min_entities_count, sbv_dim, 'bundle_ones_count_0_FP'))
    index.append((min_entities_count, sbv_dim, 'bundle_ones_count_1_FP'))
    index.append((min_entities_count, sbv_dim, 'bundle_ones_count_2_FP'))

    for non_zero_bits in tqdm(non_zero_bits_list, leave=False, desc='non_zero_bits'):
        bundle_size_0_FP = 0
        bundle_size_1_FP = 0
        bundle_size_2_FP = 0
        bundle_size_to_max_ones_count = {}
        entities_count = math.comb(sbv_dim, non_zero_bits)

        if entities_count >= min_entities_count:
            entities = np.zeros((min_entities_count, sbv_dim), dtype='f')
            unique_entity_codes = set()
            
            for i in tqdm(range(entities.shape[0]), leave=False, desc='entities'):
                attempts_to_get_unique_entity_code = 10

                for _ in range(attempts_to_get_unique_entity_code):
                    ii = RNG.choice(sbv_dim, non_zero_bits, replace=False)
                    entity_code = tuple(sorted(ii))

                    if not entity_code in unique_entity_codes:
                        entities[i][ii] = 1
                        unique_entity_codes.add(entity_code)
                        break
                else:
                    assert False, f'Failed to get unique entity code within {attempts_to_get_unique_entity_code} attempts'

            assert np.all(entities.sum(axis=1) == non_zero_bits)
            entities = xp_array_to_gpu(entities)

            for bundle_size in tqdm(list(range(1, 101)), leave=False, desc='bundle_size'):
                errors_count_list = []
                max_ones_count = 0
                
                for _ in range(50):
                    # entity_inds = RNG.choice(entities.shape[0], bundle_size, replace=False)
                    # entities_for_bundle = entities[entity_inds]
                    # sbv = xp.where(entities_for_bundle.sum(axis=0) > 0, 1, 0)
                    # entities_for_bundle_restored = entities[((entities @ sbv) == non_zero_bits)]
                    # errors_count = xp.count_nonzero(entities_for_bundle.sum(axis=0) != entities_for_bundle_restored.sum(axis=0))

                    entity_inds = RNG.choice(entities.shape[0], bundle_size, replace=False)
                    entity_inds_mask = xp.zeros(entities.shape[0], dtype=bool)
                    entity_inds_mask[entity_inds] = True
                    entities_for_bundle = entities[entity_inds]
                    sbv = xp.where(entities_for_bundle.sum(axis=0) > 0, 1, 0)
                    entity_inds_mask_restored = (entities @ sbv) == non_zero_bits
                    assert xp.all((entity_inds_mask_restored * entity_inds_mask) == entity_inds_mask) # assert than we don't lose any entities during restore procedure
                    errors_count = int((entity_inds_mask != entity_inds_mask_restored).sum()) # count False positives (phantom entities)
                    errors_count_list.append(errors_count)
                    max_ones_count = max(max_ones_count, sbv.sum())

                bundle_size_to_max_ones_count[bundle_size] = max_ones_count
                errors_count_list = np.array(errors_count_list)
                min_errors_count = np.min(errors_count_list)
                        
                if min_errors_count > 2:
                    break

                bundle_size_0_FP = bundle_size if np.all(errors_count_list <= 0) and bundle_size_0_FP == bundle_size - 1 else bundle_size_0_FP
                bundle_size_1_FP = bundle_size if np.all(errors_count_list <= 1) and bundle_size_1_FP == bundle_size - 1 else bundle_size_1_FP
                bundle_size_2_FP = bundle_size if np.all(errors_count_list <= 2) and bundle_size_2_FP == bundle_size - 1 else bundle_size_2_FP

        columns[non_zero_bits].append(bundle_size_0_FP)        
        columns[non_zero_bits].append(bundle_size_1_FP)        
        columns[non_zero_bits].append(bundle_size_2_FP)        
        columns[non_zero_bits].append(bundle_size_to_max_ones_count[bundle_size_0_FP])
        columns[non_zero_bits].append(bundle_size_to_max_ones_count[bundle_size_1_FP])
        columns[non_zero_bits].append(bundle_size_to_max_ones_count[bundle_size_2_FP])
        
        # columns[non_zero_bits].append(bundle_size_0_FP * non_zero_bits)
        # columns[non_zero_bits].append(bundle_size_1_FP * non_zero_bits)
        # columns[non_zero_bits].append(bundle_size_2_FP * non_zero_bits)

sbv_dim:   0%|          | 0/1 [00:00<?, ?it/s]

non_zero_bits:   0%|          | 0/1 [00:00<?, ?it/s]

entities:   0%|          | 0/1000 [00:00<?, ?it/s]

bundle_size:   0%|          | 0/100 [00:00<?, ?it/s]

In [11]:
pd.DataFrame(columns, index=pd.MultiIndex.from_tuples(index, names=['entities', 'N', 'param']))[range(2,3)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,2
entities,N,param,Unnamed: 3_level_1
1000,1000,bundle_size_0_FP,2
1000,1000,bundle_size_1_FP,7
1000,1000,bundle_size_2_FP,12
1000,1000,bundle_ones_count_0_FP,4
1000,1000,bundle_ones_count_1_FP,14
1000,1000,bundle_ones_count_2_FP,24
