In [None]:
# Release filename
filename = "bible2.txt"

# Test File
filename = "test.txt"

In [None]:
def LZW_compress(filename):
    """Compress data from a file byte by byte using the LZW compression algorithm.

    Args:
        filename (str): The path to the input file to be compressed.

    Returns:
        None: The compressed data is written to a new file with a ".lzw" extension.

    The LZW compression algorithm reads data from the specified file byte by byte, building a
    dictionary of byte sequences encountered. When compressing, it replaces repeated sequences
    of bytes with references to the dictionary entries, thereby reducing the size of the data.

    The compression process stops adding new entries to the dictionary if the size of the
    dictionary reaches the specified limit of 2 ** 12 entries.

    The compressed data is written to a new file with the same name as the input file but with
    a ".lzw" extension.

    Example:
        LZW_compress("input_file.txt")

    """

    # Build the dictionary.
    dict_size_limit = 2 ** 12
    dict_size = 256
    dictionary = {bytes([i]): i for i in range(dict_size)}

    w = b""
    result = []

    with open(filename, 'rb') as file:
        while byte := file.read(1):
            wc = w + byte
            if wc in dictionary:
                w = wc
            else:
                result.append(dictionary[w])
                # Add wc to the dictionary if size limit is not reached.
                if dict_size < dict_size_limit:
                    dictionary[wc] = dict_size
                    dict_size += 1
                w = byte

    # Output the code for w.
    if w:
        result.append(dictionary[w])

    # Convert the result list to bytes
    compressed_data = bytearray()
    for code in result:
        compressed_data.extend(code.to_bytes(2, 'big'))

    # Write compressed data to a new file
    with open(filename + ".lzw", "wb") as compressed_file:
        compressed_file.write(compressed_data)

LZW_compress(filename)


In [None]:
def LZW_expand(filename):
    """Expand and decompress data from a file using the LZW decompression algorithm.

    Args:
        filename (str): The path to the compressed file to be decompressed.

    Returns:
        None: The decompressed data is written to a new file with the ".2" extension.

    This function reads the compressed data from the specified file, interprets each pair of
    bytes as an integer, and builds a dictionary mapping integers to byte sequences. It then
    decompresses the data using the LZW algorithm, writing the decompressed data to a new file.

    The LZW decompression algorithm builds a dictionary while decompressing data. It initializes
    the dictionary with byte sequences corresponding to single-byte integers (0-255). As it
    encounters new sequences during decompression, it adds them to the dictionary for future
    reference.

    Example:
        LZW_expand("compressed_file.lzw")

    """
    with open(filename, "rb") as compressed_file:
        # Read the compressed data as 2-byte integers
        compressed_data = []
        while byte_pair := compressed_file.read(2):
            compressed_data.append(int.from_bytes(byte_pair, byteorder='big'))


    # Build the dictionary.
    dict_size = 256
    dictionary = {i: bytes([i]) for i in range(dict_size)}

    # Decompress the data
    result = []
    w = bytes([compressed_data.pop(0)])
    result.append(w)
    for k in compressed_data:
        if k in dictionary:
            entry = dictionary[k]
        elif k == dict_size:
            entry = w + w[0:1]
        else:
            raise ValueError('Bad compressed k: %s' % k)
        result.append(entry)

        # Add w+entry[0] to the dictionary.
        dictionary[dict_size] = w + entry[0:1]
        dict_size += 1

        w = entry

    # Write decompressed data to a new file
    decompressed_filename = filename[:-4] + ".2"
    with open(decompressed_filename, "wb") as decompressed_file:
        for entry in result:
            decompressed_file.write(entry)

LZW_expand(filename+".lzw")


In [None]:
# Test Cell

# Define color escape codes
class styles:
    RESET = '\033[0m'
    RED = '\033[91m'
    GREEN = '\033[92m'
    BOLD = '\033[1m'

import glob
import filecmp

# Define filenames to be tested
test_filenames = glob.glob("test/*.txt")

# Perform tests for each filename
for i, filename in enumerate(test_filenames, start=1):
    print(styles.BOLD + f"Test {i}: " + styles.RESET)
    LZW_compress(filename)
    LZW_expand(filename + ".lzw")
    result = filecmp.cmp(filename, filename + ".2", shallow=False)
    if result:
        print(styles.GREEN + f"* Passed - Compression and decompression successful." + styles.RESET)
    else:
        print(styles.RED + f"* FAILED - Compression and decompression unsuccessful." + styles.RESET)


In [None]:
import os

def clean(directory):
    """
    Remove all .lzw* and .2* files from the specified directory and its subdirectories.

    Parameters:
        directory (str): The directory path to search for .lzw* and .2* files.
    """
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith(('.lzw', '.lzw2', '.2', '.2M')):
                os.remove(os.path.join(dirpath, filename))

clean(".")
# clean("test/")