In [1]:
import numpy as np
import seaborn as sns
import pandas as pd

In [113]:
### UTILITY FUNCTIONS
class SparseVectors():
    def common_bits(no_of_class):
        sparse_HVs = [];

        for i in range(0,no_of_class):
            sparse_HVs.append(generate_sparse_HV())

        return sum(np.sum(np.array(sparse_HVs), axis=0)==0) + sum(np.sum(np.array(sparse_HVs), axis=0)==26)

    def average_common_bits(no_of_class, iterations):
        total = 0;
        for i in range(0,iterations):
            total += common_bits(no_of_class)
        return total/iterations

    def redundant_bits_histogram(no_of_classes, sample_size=100):
        common_bits_dist = []

        for i in range(0,sample_size):
            common_bits_dist.append(common_bits(no_of_classes))

        sns.boxplot(common_bits_dist)
        
class SparseHDC():
    def cyclic_shift(arr, shift_count):
        return np.roll(arr, shift_count)
    
    @staticmethod
    def generation_threshold(num, percent_sparsity = 5):
        return 1 if num<percent_sparsity else 0

    def accumulation_threshold(num, acc_count = 784, tresh = 50):
        return 1 if num>tresh else 0

    def generate_random_sparse_HV(dimension = 10000):
        return np.vectorize(generation_threshold)(np.random.randint(100,size=dimension))
    
    @staticmethod
    def generate_random_sparse_HVs(count = 10, dimension = 10000, sparsity=0.3):
        percent_sparsity = int(100*sparsity)
        return [np.vectorize(SparseHDC.generation_threshold)(np.random.randint(100,size=dimension), percent_sparsity) for i in range(0,count)]

In [32]:
# DATASETS

class ISOLET():
    def __init__ (self, train_filepath = 'isolet1+2+3+4.csv', test_filepath = 'isolet5.csv'):
        self.train = pd.read_csv(train_filepath, header=None)
        self.train_X = self.train[[i for i in range(0,617)]]
        self.train_y = self.train[617]
        self.test_X = pd.read_csv(test_filepath, header=None)

In [67]:
# CIM Methods

class LinearCIM():
    def __init__(self, sparsity=0.3, dimensions=10000):
        self.sparsity = sparsity
        self.dimensions = dimensions

    def generate(self, keys):
        cim = {}
        N = int(self.sparsity*self.dimensions)
        seed = np.concatenate((np.repeat(1,N), np.repeat(0,self.dimensions-N)))
        
        for i in range(0,len(keys)):
            cim[keys[i]] = np.roll(seed, i)
            
        return cim

In [None]:
# Binding Methods

class AdditiveCDTBinder():
    def bind():
        #TODO
        pass

In [117]:
# ENCODING ALGORITHMS

class Sparse_FeatureEncoder():
    def __init__(self, cim_generator, binder, feature_count=617, qlevel_count=10):
        self.feature_count = feature_count
        self.qlevel_count = qlevel_count
        self.cim = cim_generator
        self.base_hvs = SparseHDC.generate_random_sparse_HVs(count=feature_count, sparsity=0.3)
        self.binder = binder
        
        #Setup functions
        self.qlevels = self.quantization_levels()
        self.setup_CIM()

    def setup_CIM(self):
        self.cim = self.cim.generate(self.qlevels)

    def encode(self, features):
        quantized = np.vectorize(self.quantize)(features)
        mapped_to_hvs = [self.cim[v] for v in quantized]
        
        return mapped_to_hvs
    
    def bind(self, feature, value):
        return binder.bind(feature, value)

    # ENCODING HELPERS
    def quantization_levels(self, min_val=-1, max_val=1, precision=5):
        step = (max_val - min_val) / self.qlevel_count
        return np.arange(min_val, max_val+step, step).round(precision)
            
    def quantize(self, value):
        return min(self.qlevels, key=lambda x:abs(x-value))

In [121]:
# TESTING

x = Sparse_FeatureEncoder(LinearCIM(), None)