In [25]:
from ANS import *
from arithmetic_coding import *
from huffman import *
from sANS import *
import time
from utils.utils import get_symbols
from core.data import *

In [26]:
d = Data(symbols= ['a','b','c','d'], frequency=[6,2,8,4])
d.shannon_entropy()


1.8464393446710154

In [27]:
symbols= ['a','b','c','d']
dist= {"Uniform": [4, 4, 4, 4], # natural powers of two
        "Natural": [8,4,2,2],
        "Random": [6,2,8,4],
        # "left": [16,16,6,4],
        # "right": [4,4,16,20],
        # "middle": [6, 24, 24, 6]
        }
symbol_len = [5,10,20,30]

experiment_symbols = {}
for name, freq in dist.items():
    sym = []
    for slen in symbol_len:
        sym.append(get_symbols(symbols, freq, slen))
    experiment_symbols[name] = sym
    
# ans = rANS(symbols, frequency)
# huff = Huffman(symbols, frequency)
# sans = sANS(symbols, frequency)
# ari = ArithmeticCoding(symbols, frequency)
# ran = RangeCoding(symbols, frequency)

encoding_time = {}
decoding_time = {}
compression_size = {}

In [28]:
## Huffman 
def huffan():
	for name, freq in dist.items():
		huff = Huffman(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, root_node = huff.encode(s)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append(len(enc_value)/len(s))

			dec_start = time.perf_counter_ns()
			decoded_val = huff.decode(enc_value, root_node)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)

		encoding_time['Huffman@'+name] = enc_time
		compression_size['Huffman@'+name] = size
		decoding_time['Huffman@'+name] = dec_time
huffan()


In [29]:
def rangeans():
	for name, freq in dist.items():
		ans = rANS(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, msg_len = ans.encode(s, 0)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append(len(enc_value)/len(s))

			dec_start = time.perf_counter_ns()
			decoded_val = ans.decode(enc_value, msg_len)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)
		encoding_time['rANS@'+name] = enc_time
		compression_size['rANS@'+name] = size
		decoding_time['rANS@'+name] = dec_time
rangeans()

In [30]:
def streaming():
	for name, freq in dist.items():
		sans = sANS(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, bit = sans.encode(s)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append((len(enc_value)+len(bit))/len(s))
			
			dec_start = time.perf_counter_ns()
			decoded_val = sans.decode(enc_value,bit)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)
		encoding_time['sANS@'+name] = enc_time
		compression_size['sANS@'+name] = size
		decoding_time['sANS@'+name] = dec_time
streaming()

In [31]:
def arith():
	for name, freq in dist.items():
		sans = ArithmeticCoding(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			try:
				start = time.perf_counter_ns()
				enc_value, bit = sans.encode(msg = s)
				stop = time.perf_counter_ns()
				enc_time.append(stop-start)
			
				size.append(len(enc_value)/len(s))
				
				dec_start = time.perf_counter_ns()
				decoded_val = sans.decode(enc_value,bit)
				dec_stop = time.perf_counter_ns()
				dec_time.append(dec_stop-dec_start)
			except:
				enc_time.append(math.inf)
				dec_time.append(math.inf)
				size.append(math.inf)
		encoding_time['Arithmetic@'+name] = enc_time
		compression_size['Arithmetic@'+name] = size
		decoding_time['Arithmetic@'+name] = dec_time
arith()

In [32]:
def aritrang():
	for name, freq in dist.items():
		sans = RangeCoding(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			try:
				start = time.perf_counter_ns()
				enc_value, bit = sans.encode(msg = s)
				stop = time.perf_counter_ns()
				enc_time.append(stop-start)
			
				size.append(len(enc_value)/len(s))
				
				dec_start = time.perf_counter_ns()
				decoded_val = sans.decode(enc_value,bit)
				dec_stop = time.perf_counter_ns()
				dec_time.append(dec_stop-dec_start)
			except:
				print(s,freq)
				break
		encoding_time['Range@'+name] = enc_time
		compression_size['Range@'+name] = size
		decoding_time['Range@'+name] = dec_time
aritrang()

In [33]:
decoding_time

{'Huffman@Uniform': [9542, 9291, 17208, 25208],
 'Huffman@Natural': [5292, 9334, 16209, 24375],
 'Huffman@Random': [4875, 8667, 16209, 24708],
 'rANS@Uniform': [20166, 11125, 22000, 30250],
 'rANS@Natural': [5708, 10459, 17875, 27084],
 'rANS@Random': [5708, 10667, 19375, 28666],
 'sANS@Uniform': [19291, 32667, 74916, 137792],
 'sANS@Natural': [12209, 34208, 74459, 135125],
 'sANS@Random': [11833, 31417, 399458, 249584],
 'Arithmetic@Uniform': [45458, 21500, 19500, inf],
 'Arithmetic@Natural': [9125, 12125, 17666, 24209],
 'Arithmetic@Random': [8709, 12000, 18375, inf],
 'Range@Uniform': [103708, 120625, 373125, 834125],
 'Range@Natural': [34625, 121084, 287875, 637208],
 'Range@Random': [31125, 109333, 1164916, 728416]}

In [34]:
encoding_time

{'Huffman@Uniform': [24791, 7042, 6750, 7375],
 'Huffman@Natural': [13542, 6042, 6959, 7584],
 'Huffman@Random': [11584, 5959, 6833, 7542],
 'rANS@Uniform': [5750, 5166, 9916, 13500],
 'rANS@Natural': [2833, 4666, 8875, 12875],
 'rANS@Random': [2542, 4334, 8041, 12708],
 'sANS@Uniform': [14583, 9625, 17541, 25542],
 'sANS@Natural': [4833, 9167, 16208, 24167],
 'sANS@Random': [4292, 7917, 15500, 49917],
 'Arithmetic@Uniform': [303334, 18000, 17125, inf],
 'Arithmetic@Natural': [10959, 11166, 13750, 16792],
 'Arithmetic@Random': [9666, 10833, 13625, inf],
 'Range@Uniform': [32625, 14583, 25375, 37833],
 'Range@Natural': [7250, 14500, 23458, 34917],
 'Range@Random': [6792, 12542, 24833, 40334]}

In [35]:
compression_size

{'Huffman@Uniform': [2.0, 2.0, 2.0, 2.0],
 'Huffman@Natural': [2.0, 2.1, 1.7, 1.7666666666666666],
 'Huffman@Random': [1.8, 1.8, 1.75, 2.033333333333333],
 'rANS@Uniform': [1.8, 2.4, 2.15, 2.1333333333333333],
 'rANS@Natural': [2.6, 2.4, 1.9, 1.8],
 'rANS@Random': [2.6, 2.3, 1.9, 2.033333333333333],
 'sANS@Uniform': [3.4, 3.7, 3.85, 4.566666666666666],
 'sANS@Natural': [3.4, 3.8, 3.6, 4.166666666666667],
 'sANS@Random': [3.0, 3.3, 3.35, 4.1],
 'Arithmetic@Uniform': [2.2, 2.1, 2.05, inf],
 'Arithmetic@Natural': [2.2, 2.2, 1.75, 1.8],
 'Arithmetic@Random': [2.0, 2.0, 1.8, inf],
 'Range@Uniform': [1.8, 1.9, 1.85, 1.9666666666666666],
 'Range@Natural': [1.6, 2.0, 1.65, 1.7],
 'Range@Random': [1.6, 1.8, 1.75, 1.8]}

In [36]:
average_compression_dist = {}
average_enc_time_dist = {}
average_dec_time_dist = {}

for k, v in compression_size.items():
    average_compression_dist[k] = sum(v)/len(v)
for k, v in encoding_time.items():
    average_enc_time_dist[k] = sum(v)/len(v)
for k, v in decoding_time.items():
    average_dec_time_dist[k] = sum(v)/len(v)

In [37]:
comp_df_data = []
ex_time_df_data = []
dec_Time_df_data = []
for k, v in average_compression_dist.items():
    comp_df_data.append([k.split('@')[0],k.split('@')[1], v])
for k, v in average_enc_time_dist.items():
    ex_time_df_data.append([k.split('@')[0],k.split('@')[1], v])
for k, v in average_dec_time_dist.items():
    dec_Time_df_data.append([k.split('@')[0],k.split('@')[1], v])

In [38]:
comp_df = pd.DataFrame(comp_df_data, columns=['Compression', 'Distribuiton', 'Average Compressed Data Size'])
ex_time_df = pd.DataFrame(ex_time_df_data, columns=['Compression', 'Distribuiton', 'Average Encoding Time'])
dec_time_df = pd.DataFrame(dec_Time_df_data, columns=['Compression', 'Distribuiton', 'Average Decoding Time'])

In [39]:
comp_df

Unnamed: 0,Compression,Distribuiton,Average Compressed Data Size
0,Huffman,Uniform,2.0
1,Huffman,Natural,1.891667
2,Huffman,Random,1.845833
3,rANS,Uniform,2.120833
4,rANS,Natural,2.175
5,rANS,Random,2.208333
6,sANS,Uniform,3.879167
7,sANS,Natural,3.741667
8,sANS,Random,3.4375
9,Arithmetic,Uniform,inf


In [40]:
import plotly.express as px
df = comp_df
fig = px.histogram(df, x="Compression", y="Average Compressed Data Size",
             color='Distribuiton', barmode='group', title="Comparision: Average Size of Compressed Data")
fig.update_layout( yaxis_title="Average Compressed Data Size" )
fig.show()

In [41]:
import plotly.express as px
df = ex_time_df
fig = px.histogram(df, x="Compression", y="Average Encoding Time",
             color='Distribuiton', barmode='group',title="Comparision: Average Encoding Time")
fig.update_layout( yaxis_title="Average Encodng Time (ns)" )

fig.show()

In [42]:
import plotly.express as px
df = dec_time_df
fig = px.histogram(df, x="Compression", y="Average Decoding Time",
             color='Distribuiton', barmode='group',
             title="Comparision: Average Decoding Time")
fig.update_layout( yaxis_title="Average Decoding Time (ns)" )

fig.show()