In [1]:
import random
import numpy as np

# this is a practical implementation of
# Understanding Entropy Coding With Asymmetric
# Numeral Systems (ANS): a Statistician’s Perspective


def quantize_pmf(pmf, precision):
     n = 1 << precision
     assert n > 2 * len(pmf)
     qt_pmf = np.zeros(pmf.shape, dtype=np.long)
     
     sum = 0 
     for i in range(len(pmf)):
         val = int(np.around(pmf[i] * n))
         if val == 0:
             val = val + 1
         qt_pmf[i] = val
         sum += val
     
     diff = n - sum
     
     if diff < 0:
         while diff != 0:
             for i in range(len(pmf)):
                 if qt_pmf[i] != 1:
                     qt_pmf[i] = qt_pmf[i] - 1
                     diff += 1
                 if diff == 0:
                     break
    
     elif diff > 0:
         while diff != 0:
             for i in range(len(pmf)):
                 if qt_pmf[i] != 1:
                     qt_pmf[i] = qt_pmf[i] + 1
                     diff -= 1
                 if diff == 0:
                     break

     return qt_pmf



class StreamANS(object):
    def __init__(self, precision):
        self.precision = precision
        self.n = 1 << self.precision
        self.mask = self.precision - 1
        self.disk = []
        self.head = self.precision + random.randint(0, self.precision - 1)
    
    def flush(self):
        self.disk.append(self.head & self.mask)
        self.head = self.head >> self.bits

    def load(self):
        self.head = (self.head << self.bits) | self.disk.pop()

    def push(self, sym, freq):
        cdfs = freq.cumsum()

        if (self.head >> self.bits) >= freq[sym]:
            self.flush()

        u = self.head % freq[sym]
        self.head = self.head // freq[sym]
        offset = 0 if sym == 0 else cdfs[sym - 1]
        z = u + offset
        self.head = self.head * self.n + z

    def pop(self, freq):
        cdfs = freq.cumsum()
        z = self.head & self.mask
        self.head = self.head >> self.bits
        sym = np.searchsorted(cdfs, z, 'right')
        offset = 0 if sym == 0 else cdfs[sym - 1]
        u = z - offset
        self.head = self.head * freq[sym] + u

        if (self.head >> self.bits) == 0 and len(self.disk) != 0:
            self.load()

        return sym



In [2]:
# without bit operation 
class StreamANS1(object):
    def __init__(self, precision):
        self.precision = precision
        self.n = 1 << self.precision
        self.mask = self.n - 1
        self.disk = []
        self.head = self.n + random.randint(0, self.n - 1)
    
    def flush(self):
        self.disk.append(self.head % self.n)
        self.head = self.head // self.n

    def load(self):
        self.head = (self.head * self.n) + self.disk.pop()

    def push(self, sym, freq):
        cdfs = freq.cumsum()

        if (self.head // self.n) >= freq[sym]:
            self.flush()

        u = self.head % freq[sym]
        self.head = self.head // freq[sym]
        offset = 0 if sym == 0 else cdfs[sym - 1]
        z = u + offset
        self.head = self.head * self.n + z

    def pop(self, freq):
        cdfs = freq.cumsum()
        z = self.head % self.n
        self.head = self.head // self.n
        sym = np.searchsorted(cdfs, z, 'right')
        offset = 0 if sym == 0 else cdfs[sym - 1]
        u = z - offset
        self.head = self.head * freq[sym] + u

        if (self.head // self.n) == 0 and len(self.disk) != 0:
            self.load()

        return sym


In [3]:
# with bit operation 
# same as EasyStreamANS (all lines are equivalent) but faster
class StreamANS2(object):
    def __init__(self, precision):
        self.precision = precision
        self.n = 1 << self.precision
        self.mask = self.n - 1
        self.disk = []
        self.head = self.n + random.randint(0, self.n - 1)
    
    def flush(self):
        self.disk.append(self.head & self.mask)
        self.head = self.head >> self.precision

    def load(self):
        self.head = (self.head << self.precision) | self.disk.pop()

    def push(self, sym, freq):
        cdfs = freq.cumsum()

        if (self.head >> self.precision) >= freq[sym]:
            self.flush()

        u = self.head % freq[sym]
        self.head = self.head // freq[sym]
        offset = 0 if sym == 0 else cdfs[sym - 1]
        z = u + offset
        self.head = self.head * self.n + z

    def pop(self, freq):
        cdfs = freq.cumsum()
        z = self.head & self.mask
        self.head = self.head >> self.precision
        sym = np.searchsorted(cdfs, z, 'right')
        offset = 0 if sym == 0 else cdfs[sym - 1]
        u = z - offset
        self.head = self.head * freq[sym] + u

        if (self.head >> self.precision) == 0 and len(self.disk) != 0:
            self.load()

        return sym


In [4]:
def randpmf(size):
    pmf = np.abs(np.random.randn(size))
    pmf = pmf / pmf.sum()
    return pmf


msg = [0, 0, 77 ,4, 7, 1, 2, 3, 4, 5, 6, 7, 8, 11, 15]
precision = 16
coder = StreamANS2(precision)

# generate random pmfs for each symbols
freqs = [quantize_pmf(randpmf(5), precision),
         quantize_pmf(randpmf(15), precision),
         quantize_pmf(randpmf(100), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),

         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),

         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(64), precision),
         quantize_pmf(randpmf(17), precision)]


for i in range(len(msg)):
    coder.push(msg[i], freqs[i])


print('code: {}'.format(coder.head))


dec = []
for i in reversed(range(len(msg))):
    dec.append(coder.pop(freqs[i]))

dec.reverse()
print(dec)

code: 255342
[0, 0, 77, 4, 7, 1, 2, 3, 4, 5, 6, 7, 8, 11, 15]
