In [55]:
# Release filename
filename = "bible2.txt"

# Test File
filename = "test.txt"
NUM_BYTES = 3 # For large text file, I need 4!

In [56]:
def LZW_compress(filename):
    """
    Compress a file using the LZW algorithm and save the compressed data to a new file.

    Parameters:
        filename (str): The filename of the input file (.txt) to be compressed.

    Note:
        The function reads the content of the input file byte by byte in binary mode ('rb'),
        applies the LZW compression algorithm, and writes the compressed data to a new file
        with the extension '.lzw' appended to the original filename.

    Example:
        LZW_compress("test.txt")
    """

    # Build the dictionary.
    dict_size = 256
    dictionary = {bytes([i]): i for i in range(dict_size)}

    w = b""
    result = []

    with open(filename, 'rb') as file:
        byte = file.read(1)
        while byte:
            wc = w + byte
            if wc in dictionary:
                w = wc
            else:
                result.append(dictionary[w])
                # Add wc to the dictionary.
                dictionary[wc] = dict_size
                dict_size += 1
                w = byte
            byte = file.read(1)  # Read next byte

    # Output the code for w.
    if w:
        result.append(dictionary[w])

    # Write compressed data to a new file
    with open(filename + ".lzw", "wb") as compressed_file:
        for code in result:
            compressed_file.write(code.to_bytes(NUM_BYTES, byteorder='big'))  # Write each code as 2-byte big-endian integer

LZW_compress(filename)


In [57]:
def LZW_expand(filename):
    """
    Decompress a file that has been compressed using the LZW algorithm.

    Parameters:
        filename (str): The filename of the compressed file (.lzw) to be decompressed.

    Note:
        The function reads the compressed data from the input file byte by byte in binary mode ('rb'),
        applies the LZW decompression algorithm, and writes the decompressed data to a new file
        with the extension '.2' appended to the original filename.

    Example:
        LZW_expand("test.txt.lzw")
    """
    # Read compressed data from the file
    with open(filename, "rb") as compressed_file:
        # Read the compressed data as 2-byte integers
        compressed_data = []
        while True:
            byte_pair = compressed_file.read(NUM_BYTES)
            if not byte_pair:
                break
            compressed_data.append(int.from_bytes(byte_pair, byteorder='big'))

    # Build the dictionary.
    dict_size = 256
    dictionary = {i: bytes([i]) for i in range(dict_size)}

    # Decompress the data
    result = []
    w = bytes([compressed_data.pop(0)])
    result.append(w)
    for k in compressed_data:
        if k in dictionary:
            entry = dictionary[k]
        elif k == dict_size:
            entry = w + w[0:1]
        else:
            raise ValueError('Bad compressed k: %s' % k)
        result.append(entry)

        # Add w+entry[0] to the dictionary.
        dictionary[dict_size] = w + entry[0:1]
        dict_size += 1

        w = entry

    # Write decompressed data to a new file
    decompressed_filename = filename[:-4] + ".2"
    with open(decompressed_filename, "wb") as decompressed_file:
        for entry in result:
            decompressed_file.write(entry)

LZW_expand(filename+".lzw")


In [58]:
# Test Cell

# Define color escape codes
class styles:
    RESET = '\033[0m'
    RED = '\033[91m'
    GREEN = '\033[92m'
    BOLD = '\033[1m'

def compare_files(file1, file2):
    with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
        while True:
            byte1 = f1.read(1)
            byte2 = f2.read(1)
            if byte1 != byte2:
                print(styles.RED + f"* Files {file1} and {file2} are not identical." + styles.RESET)
                return False
            if not byte1:  # Reached end of both files
                break
    print(styles.GREEN + f"* Files {file1} and {file2} are identical." + styles.RESET)
    return True

# Define filenames to be tested
test_filenames = [
    "test/akjv.txt",
    "test/asv.txt",
    "test/brb.txt",
    "test/bsb.txt",
    "test/kjv.txt",
]

# Perform tests for each filename
for i, filename in enumerate(test_filenames, start=1):
    print(styles.BOLD + f"Test {i}: " + styles.RESET)
    LZW_compress(filename)
    LZW_expand(filename + ".lzw")
    result = compare_files(filename, filename + ".2")
    if result:
        print(styles.GREEN + f"* Passed - Compression and decompression successful." + styles.RESET)
    else:
        print(styles.RED + f"* FAILED - Compression and decompression unsuccessful." + styles.RESET)


[1mTest 1: [0m
[92m* Files test/akjv.txt and test/akjv.txt.2 are identical.[0m
[92m* Passed - Compression and decompression successful.[0m
[1mTest 2: [0m
[92m* Files test/asv.txt and test/asv.txt.2 are identical.[0m
[92m* Passed - Compression and decompression successful.[0m
[1mTest 3: [0m
[92m* Files test/brb.txt and test/brb.txt.2 are identical.[0m
[92m* Passed - Compression and decompression successful.[0m
[1mTest 4: [0m
[92m* Files test/bsb.txt and test/bsb.txt.2 are identical.[0m
[92m* Passed - Compression and decompression successful.[0m
[1mTest 5: [0m
[92m* Files test/kjv.txt and test/kjv.txt.2 are identical.[0m
[92m* Passed - Compression and decompression successful.[0m


In [59]:
import os

def remove_all_lzw_files(directory):
    """
    Remove all LZW files from the specified directory and its subdirectories.

    Parameters:
        directory (str): The directory path to search for LZW files.
    """
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith('.lzw'):
                os.remove(os.path.join(dirpath, filename))

def remove_all_dot_2_files(directory):
    """
    Remove all files with extension '.2' from the specified directory and its subdirectories.

    Parameters:
        directory (str): The directory path to search for '.2' files.
    """
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith('.2'):
                os.remove(os.path.join(dirpath, filename))

remove_all_lzw_files("test/")
remove_all_dot_2_files("test/")
# remove_all_lzw_files(".")
# remove_all_dot_2_files(".")

In [60]:
# keep the function name
def LZW_modified_compress(fname):
    print(fname)
    print(fname+".lzw2")
    # your code here

# keep this line
LZW_modified_compress(filename)

test/kjv.txt
test/kjv.txt.lzw2


In [61]:
# keep the function name
def LZW_modified_expand(fname):
    print(fname)
    print(fname[:-5]+".2M")
    # your code here

# keep this line
LZW_modified_expand(filename+".lzw2")

test/kjv.txt.lzw2
test/kjv.txt.2M
