In [16]:
import numpy as np
from bitstream import BitStream
from numpy import array


In [573]:
class ArithmeticEncoder(object):
    def __init__(self, bitlen):
        self.bit_prec = bitlen   #bit precision 
        self.max_range = 1 << self.bit_prec  #max range based on bit precision 2^bit_prec
        self.mask = self.max_range - 1  #max range index starting at 0
        self.renorm= self.max_range >> 1  #renormalization threshold
        self.second_mask = self.max_range >> 1
        self.low = 0  #initial low
        self.high = self.mask  #initial high
        self.s = 0  

    def update(self, sym, c):
        low = self.low   
        high = self.high
        range = high - low + 1
        total = (c[-1])  #cumulative propabilities
        symlow = c[sym]  
        symhigh = c[sym+1]  
        

        newlow = low + symlow*range // total  #low in arithmetic integer
        newhigh = low + symhigh*range // total -1 #high in arithemtic integer
        self.low = int(newlow)
        self.high = int(newhigh)
        range = self.high - self.low 
#         print("sym:", sym)
#         print("symlow:", self.low)
#         print("symhigh:", self.high)
#         print("range:", range)
        
        #renormalization
        while((self.low ^ self.high) & self.renorm) == 0:
            self.low = (self.low << 1) 
            range = range << 1 | 1
            self.high = self.low + range
            self.s = self.s + 1          
#         print("renorm low:",self.low)
#         print("renorm range:",range)
#         print("renorm high:", self.high)
   
    def write(self, c, sym):
        self.update(c,sym)
        return [self.low, self.s]
        
    def finish(self):
        self.output.write(1)

In [677]:
class ArithmeticDecoder(ArithmeticEncoder):
    def __init__(self,statesize,bitin):
        self.input = bitin
        self.bitstream = ([int(d) for d in str(self.input)])
        print(self.bitstream)
        self.code = 0
        self.max_range = 1 << statesize
        self.renorm= self.max_range >> 1
        self.mask = self.max_range - 1
        
        self.stream = self.input[0:statesize]
        self.low = int(self.stream,2)
        self.t = statesize
        self.thresh = []
        self.thresh = [round(c[i]*self.mask) for i in range(len(c))]
        
    def decode(self):
        for i in range(len(self.thresh)-1):
            if((self.low < self.thresh[i+1]) & (self.low >= self.thresh[i])):
                sym = i
                print("encoded value:",self.low)
                print("threshold:", self.thresh)
                print("decoded value:", sym)
                rangenew = int(self.thresh[i+1] - self.thresh[i])
                print("range:", rangenew)
                while (rangenew < self.renorm):
#                     self.low = self.low * 2
#                     rangenew = rangenew * 2 
                    self.t = self.t + 1
                    self.low = self.low*2
                    print(self.low)
                    rangenew = rangenew << 1 | 1
                    self.thresh[i] = ((int(self.thresh[i])*2) & self.mask) + int(self.bitstream[self.t])
                    self.thresh[i+1] = rangenew + self.thresh[i]
                self.thresh = [round(c[j]*(self.thresh[i+1]-self.thresh[i]))+self.thresh[i] for j in range(len(c))]
                break
        else:
            print("error")
        return sym


In [678]:
#test bit stream
bs = 1024
bitprecision = 8

in_stream = array(['a','c', 'g','t','n'])
# in_stream = array(['a','c','a', 'c'])
prob = array([0.2, 0.2, 0.2, 0.2, 0.2])
# in_stream = array(['c','a','c','c'])
# prob = array([0.5, 0.5])

#cumulative probability
c = []
for i in range(len(prob)+1):
    c.append(sum(prob[0:i]))

#convert letters into numbers that correspond to cum probability index
sym = []
for x, letter in enumerate(in_stream): 
    if letter == 'a':
        sym.append(0)
    elif letter =='c':
        sym.append(1)
    elif letter == 'g':
        sym.append(2)
    elif letter == 't':
        sym.append(3)
    elif letter =='n': 
        sym.append(4)
    
print("symbol stream:", in_stream)
print("num stream:", sym)
print("probability:", prob)
print("cumulative prob:", c)


enc = ArithmeticEncoder(bitprecision)
for j in range(len(sym)):
    new = enc.write(sym[j], c)
    print("[low,s]:", new)

low_final = new[0]
print(low_final)
s_final = new[1]

bitstream = format(int(low_final), 'b')
while len(bitstream) < (s_final+bitprecision):
    bitstream = '0' + bitstream
print(bitstream)


#START DECODER
dec = ArithmeticDecoder(bitprecision, bitstream)

symdec = []
for symbols in range(len(sym)):
    symdec.append(dec.decode())
print(symdec)
    
dec_stream = []
for x, sym in enumerate(symdec): 
    if sym == 0:
        dec_stream.append('a')
    elif sym ==1:
        dec_stream.append('c')
    elif sym == 2:
        dec_stream.append('g')
    elif sym == 3:
        dec_stream.append('t')
    elif sym ==4: 
        dec_stream.append('n')
        
print(dec_stream)

symbol stream: ['a' 'c' 'g' 't' 'n']
num stream: [0, 1, 2, 3, 4]
probability: [0.2 0.2 0.2 0.2 0.2]
cumulative prob: [0, 0.2, 0.4, 0.6000000000000001, 0.8, 1.0]
[low,s]: [0, 2]
[low,s]: [80, 3]
[low,s]: [112, 3]
[low,s]: [3904, 8]
[low,s]: [31840, 11]
31840
0000111110001100000
[0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
encoded value: 15
threshold: [0, 51.0, 102.0, 153.0, 204.0, 255.0]
decoded value: 0
range: 51
30
60
encoded value: 60
threshold: [0, 41.0, 83.0, 124.0, 166.0, 207.0]
decoded value: 1
range: 42
120
240
encoded value: 240
threshold: [165, 199.0, 233.0, 268.0, 302.0, 336.0]
decoded value: 2
range: 35
480
960
error


UnboundLocalError: local variable 'sym' referenced before assignment