# Example Workflow
This notebook shows an example workflow using `tANS_py`.

# Using the library

In [1]:
# Importing the tANS module and testing it with a simple message
from tANS_py import tANS

# takes a string or list of symbols as input
msg = "Hello World! This is a test message to see how well the tANS algorithm works. It should be able to compress this message quite well, as it has a lot of repeated characters. Let's see how well it does!"
msg_list = list(msg)

# Using the tANS module to encode and decode the message as a string

# L determines the table size, the larger the table, the more efficient the encoding (default is 1024)
# fast determines whether to use the fast or slow version of the spread, slow is more efficient but slower (default is False)
bits, c = tANS.encode(msg, L = 128, fast= False)  

# Note: the output of encode, c, must be passed to decode, as it contains the data necessary to decode the message   
res = tANS.decode(bits, c)              

# returns a list of characters, so we need to join them into a string
res = "".join(res) 

print(msg[:11])
print("String Works:",res == msg)

# Using the tANS module to encode and decode the message as a list
bits, c = tANS.encode(msg_list)
res = tANS.decode(bits, c)

print(msg_list[:11])
print("List Works:",res == msg_list)

Hello World
String Works: True
['H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd']
List Works: True


In [20]:
# Can also test the encode_decode function to test the compression ratio, good for evaluating the algorithm
msg = "Hello World! This is a test message to see how well the tANS algorithm works. It should be able to compress this message quite well, as it has a lot of repeated characters. Let's see how well it does!"
msg2 = "Hello World! This message will compress worse"

res = tANS.encode_decode_test(msg, L = 1024, fast = False)
res2 = tANS.encode_decode_test(msg2, L = 1024, fast = False)

print("Message 1")
print("\tBitstream","".join([str(i) for i in res[0]]))
print("\tWorking:",res[1] == msg)
print("\tComp Ratio:",res[2])

print("Message 2")
print("\tBitstream","".join([str(i) for i in res2[0]]))
print("\tWorking:",res2[1] == msg2)
print("\tComp Ratio:",res2[2])

Message 1
	Bitstream 1100011110111001001101110000000001010101101000010111111010110100100110001101101001101010110001001110110011000000001111000001101111110101110000100001110000001100100101101110111100000011110100101100010011010101011110110001100101110010110111100010110001110110110101010101101001000100001001111001000101110101000101000000100110010101110111010101111110101000000100110010101101111000010010100001111000101001111111110100010010000100001100001100010010111011100010111010000010100011011110110111101101011100001101001000110011010100001000000011011011011111101000111111110101101101101100101101111000110001100000101011101100101100110001011001111001100011101010101101001101111001000010011110100110010100011011100011011011110110110001100100111001110000111010110110001011010011011010111010101011101101001100101011011101010001000101111000100100111000001100011
	Working: True
	Comp Ratio: 1.426872770511296
Message 2
	Bitstream 1010011100111011011111011100010101110000011111000101011000100100110110

In [3]:
# Testing with different messages to see how well it compresses
msg = ["12121212",
       "111222333444555666777888999000",
       "AABBBCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ",
       "Adam walked to the park and saw a dog on a leash",
       "Once upon a time, in a quaint village nestled between rolling hills and dense forests, there was a girl named Elara. Elara was curious and adventurous, with a love for exploring the unknown. One sunny afternoon, while wandering through the woods, she stumbled upon an old, overgrown path she had never seen before.Intrigued, she followed the path as it twisted and turned through the forest. The trees grew taller and denser, their leaves forming a thick canopy overhead. After what felt like hours, Elara emerged into a clearing. There, hidden from the rest of the world, was a beautiful, abandoned garden.The garden was unlike anything she had ever seen. Flowers of every color imaginable bloomed in wild profusion, their petals shimmering in the sunlight. Vines climbed over ancient stone walls, and a small, sparkling stream wound its way through the center of the garden. At the heart of the garden stood a magnificent tree with golden leaves.Elara felt a sense of peace and wonder as she explored the garden. She discovered a stone bench near the tree and sat down, taking in the beauty around her. As she rested, she noticed a small, intricately carved box nestled among the roots of the tree. She carefully opened the box to find a delicate, silver pendant shaped like a leaf.As soon as she touched the pendant, a warm, comforting glow surrounded her. The pendant began to pulse with a gentle light, and Elara felt a connection to the garden and its magic. She realized that the garden was a special place, hidden from the world, meant to be a sanctuary for those who found it.From that day on, Elara visited the hidden garden whenever she needed solace or inspiration. She tended to the plants, ensuring the garden remained a vibrant, magical refuge. The pendant became her cherished keepsake, a reminder of the secret garden and the peace it brought her.Elara grew older, and her adventures took her far and wide, but she always returned to the hidden garden. She shared its secret with only a few close friends, who helped her care for it and ensure its magic endured. The garden remained a hidden gem, a place of beauty and tranquility, cherished by those who knew its secret.And so, the hidden garden flourished, a timeless sanctuary of nature and magic, forever entwined with Elara’s heart."]
   
for m in msg:    
    print("Message:",m[:50])
    res = tANS.encode_decode_test(m)
    print("Working:",res[1] == m)
    print("Comp Ratio:",res[2])

Message: 12121212
Working: True
Comp Ratio: 0.5714285714285714
Message: 111222333444555666777888999000
Working: True
Comp Ratio: 1.0084033613445378
Message: AABBBCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKL
Working: True
Comp Ratio: 1.0316139767054908
Message: Adam walked to the park and saw a dog on a leash
Working: True
Comp Ratio: 1.2371134020618557
Message: Once upon a time, in a quaint village nestled betw
Working: True
Comp Ratio: 1.4216371863430688


***Note*** 

the comp ratio depends on a number of factors, mainly the **entropy** of the message and the **alphabet size**. The larger the alphabet size, the better the compression ratio, as the original message will take up more space. The entropy of the message is also a factor, with lower entropy generally leading to better compression ratios.

# Backend
The rest of this notebook shows the backend of the library, which is not necessary to understand to use the library.

## Using `Coder` Class
This is a higher level object that simplifies encoding and decoding

In [4]:
# importing the Coder class as well as the Utils module, which helps with generating random data for testing
import tANS_py.Coder, tANS_py.Utils
import numpy as np

# Set up the alphabet
s = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
nbits = 5 # 5 bits per symbol as there are 26 symbols in the alphabet

# Run this multiple times to see how it performs on average
comp_ratios = []
for i in range(50):
    # Set up random frequencies
    # This specifically generates a list of len(s) numbers randomly chosen between 1 and 100
    freq = tANS_py.Utils.generate_random_list_target(len(s), 100, 1024)

    # Create the Coder object
    c = tANS_py.Coder.Coder(sum(freq), s, freq, fast = False) # specifies fast = False to use slower, but more effecient spread function

    # Create a message
    # Specifically generates a random string using symbols from s with frequencies from freq
    msg = tANS_py.Utils.generate_random_string(s, freq)

    # Encode and decode the message and get the number of bits of the encoded message
    # Note: you must pass in message as a list of symbols
    out, bits = c.encode_decode(list(msg))

    # Check if the decoding worked
    if "".join(out) != msg:
        # If the decoding failed, print a message
        print("Coding failed")
    else:
        # If the decoding worked, save the compression ratio
        comp_ratios.append(len(msg) * nbits / bits)
    
print("Comp Ratio:", np.mean(comp_ratios))

Comp Ratio: 1.1228140770504351


In [5]:
# Also can use on a string 

msg = "HELLO"

bits = c.encode_string(msg)
out = c.decode_string(bits)

print("Original:", msg)
print("Decoded:", out)
print("Equal:", msg == out)
print("Bitstream:", bits)

Original: HELLO
Decoded: HELLO
Equal: True
Bitstream: [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1]


## Using `Encoder` and `Decoder` Classes
These are the low level objects used to encode and decode data. 

In [6]:
# Testing code 
import tANS_py.Decoder
import tANS_py.Encoder

# Define the alphabet and the frequency of each symbol
s = ["A","B","C"]
freq = [6, 2, 24]

# Create the encoder and decoder
t = tANS_py.Decoder.DecodeTable(32, s, freq, fast = False)
g = tANS_py.Encoder.Encoder(32, s,freq,t.symbol_spread)

# Create message
msg = "CAACACCCCCCCCBCCCACCCACCCACCCBCC"
msg_temp = list(msg)

# Encode message
bit = g.encode(msg_temp)

# Decode message
out = t.decode(bit)
out.reverse()   # Reverse the list to get the correct order, as the decoder outputs the list in reverse order
print("Coding worked:", "".join(out) == msg)

Coding worked: True


# `Utils` Module
This module contains helper functions to convert between different data types and to visualize the encoded data.

In [7]:
from tANS_py import Utils

In [8]:
# generates a list of length numbers that sum to a power of 2, with each number being randomly chosen between 1 and n
l = Utils.generate_random_list_pow2(10, 10)
print("List is",l , "and sum is", sum(l))

List is [7, 2, 4, 7, 7, 5, 6, 6, 9, 11] and sum is 64


In [9]:
# generates a list of length numbers that sum to a target sum, with each number being randomly chosen between 1 and n
l = Utils.generate_random_list_target(10, 10, 50)
print("List is",l,"and sum is", sum(l))

List is [2, 4, 7, 7, 7, 6, 8, 5, 1, 3] and sum is 50


In [10]:
# rescales a list of numbers to sum to a power of 2 that is less than or equal to max sum
input_list = [1,2,3,4,5,6,7,8,9,10]
print("Original list:", input_list, "Original sum:", sum(input_list))
rescaled_list = Utils.rescale_list_to_power_of_2(input_list, 128)
print("Rescaled list:", rescaled_list, "Rescaled sum:", sum(rescaled_list))
# trying to rescale to a sum that is not a power of two will rescale to the nearest power of two that is lower than the target sum
rescaled_list = Utils.rescale_list_to_power_of_2(input_list, 100) # should rescale to 64
print("Rescaled list:", rescaled_list, "Rescaled sum:", sum(rescaled_list))

Original list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Original sum: 55
Rescaled list: [2, 5, 7, 9, 12, 14, 16, 19, 21, 23] Rescaled sum: 128
Rescaled list: [2, 5, 7, 9, 12, 14, 16, 19, 21, 23] Rescaled sum: 128


In [11]:
# generates a random string of length n using symbols from s with frequencies from freq
s = ["A", "B", "C", "D"]
freq = [1, 2, 3, 4]
print(Utils.generate_random_string(s, freq))

DCBCACDBDD
