In [8]:
from prob3_utils import HuffmanEncoder, Queue, Tree, Node
import sys

def huffman_encoding(data: str) -> (str, HuffmanEncoder):
    """
    Huffman encoding method
    :param data: text desired to be codified
    :return: text encoded and the corresponding text specific encoder
    """

    if len(data) == 0:
        print("Please introduce a non null string")
        return

    else:
        temp_queue = Queue(string=data)
        temp_tree = Tree(queue=temp_queue)
        temp_tree.binaryze()
        temp_encoder = HuffmanEncoder(temp_tree)

        return temp_encoder.encode(data), temp_encoder


def huffman_decoding(data: str, encoder: HuffmanEncoder) -> str:
    """
    Huffman decoding method
    :param data: text desired to be decoded
    :param encoder: Huffman encoder used to initially encode the text
    :return: text decoded, i.e. originally restored
    """

    return encoder.decode(data)

In [9]:
# Normal Cases:
# Case 1
print('Case 1:')

a_great_sentence = "The bird is the word"

print("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
# The size of the data is: 69
print("The content of the data is: {}\n".format(a_great_sentence))
# The content of the data is: The bird is the word

encoded_data, tree = huffman_encoding(a_great_sentence)

print("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
# The size of the encoded data is: 36
print("The content of the encoded data is: {}\n".format(encoded_data))
# The content of the encoded data is: 0001011011101000111001010010011000000001000011101110100110001111010010

decoded_data = huffman_decoding(encoded_data, tree)

print("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
# The size of the decoded data is: 69
print("The content of the encoded data is: {}\n".format(decoded_data))
# The content of the encoded data is: The bird is the word

# Case 2
print('Case 2:')

a_great_sentence = "I just want to have fun coding"

print("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
# The size of the data is: 79
print("The content of the data is: {}\n".format(a_great_sentence))
# The size of the data is: 79

encoded_data, tree = huffman_encoding(a_great_sentence)

print("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
# The size of the encoded data is: 40
print("The content of the encoded data is: {}\n".format(encoded_data))
# The content of the encoded data is: 00110110011100010010010111001011000000010111101100111010101000010110100
# 0110100100010000110111010010111101100000001101

decoded_data = huffman_decoding(encoded_data, tree)

print("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
# The size of the decoded data is: 79
print("The content of the encoded data is: {}\n".format(decoded_data))
# The content of the encoded data is: I just want to have fun coding

# Case 3
print('Case 3:')

a_great_sentence = "The sun shines and I go to the beach"

print("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
# The size of the data is: 85
print("The content of the data is: {}\n".format(a_great_sentence))
# The content of the data is: The sun shines and I go to the beach

encoded_data, tree = huffman_encoding(a_great_sentence)

print("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
# The size of the encoded data is: 44
print("The content of the encoded data is: {}\n".format(encoded_data))
# The content of the encoded data is: 1001011011101000110011001001000111010000001011100010100110010100010110110
# 01101110000001000010000001000011101110110100111001110101110

decoded_data = huffman_decoding(encoded_data, tree)

print("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
# The size of the decoded data is: 85
print("The content of the encoded data is: {}\n".format(decoded_data))
# The content of the encoded data is: The sun shines and I go to the beach

# Edge Cases
# Case 4
print('Edge Cases:')
print('Case 4:')

a_not_so_great_sentence = "aaa"

print("The size of the data is: {}\n".format(sys.getsizeof(a_not_so_great_sentence)))
# The size of the data is: 52
print("The content of the data is: {}\n".format(a_not_so_great_sentence))
# The content of the data is: aaa

encoded_data, tree = huffman_encoding(a_not_so_great_sentence)

print("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
# The size of the encoded data is: 24
print("The content of the encoded data is: {}\n".format(encoded_data))
# The content of the encoded data is: 000

decoded_data = huffman_decoding(encoded_data, tree)

print("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
# The size of the decoded data is: 52
print("The content of the encoded data is: {}\n".format(decoded_data))
# The content of the encoded data is: aaa

# Case 5
print('Case 5:')
a_not_so_great_sentence = ""

print("The size of the data is: {}\n".format(sys.getsizeof(a_not_so_great_sentence)))
# The size of the data is: 49
print("The content of the data is: {}\n".format(a_not_so_great_sentence))
# The content of the data is:

huffman_encoding(a_not_so_great_sentence)
# Please introduce a non null string


Case 1:
The size of the data is: 69

The content of the data is: The bird is the word

The size of the encoded data is: 36

The content of the encoded data is: 000010011001110010000110100010100010001010000000000100000101100111001000110000111010100010

The size of the decoded data is: 69

The content of the encoded data is: The bird is the word

Case 2:
The size of the data is: 79

The content of the data is: I just want to have fun coding

The size of the encoded data is: 44

The content of the encoded data is: 000110011000111000010001000101011000101010000000001010110101010010110010100100000101100100001100100100001000000110011100100100111100110000000001101

The size of the decoded data is: 79

The content of the encoded data is: I just want to have fun coding

Case 3:
The size of the data is: 85

The content of the data is: The sun shines and I go to the beach

The size of the encoded data is: 48

The content of the encoded data is: 01001001100111001000010100110001000100001011001000000