In [1]:
import numpy as np
import pickle
import os
import time

# LZW

In [2]:
def lzw_encode(data):
    dictionary = {bytes([i]): i for i in range(256)}
    next_code = 256
    result = []
    current_sequence = b""

    for byte in data:
        current_sequence += bytes([byte])
        if current_sequence not in dictionary:
            dictionary[current_sequence] = next_code
            next_code += 1
            result.append(dictionary[current_sequence[:-1]])
            current_sequence = bytes([byte])

    result.append(dictionary[current_sequence])
    return result

In [3]:
def lzw_decode(encoded_data):
    dictionary = {i: bytes([i]) for i in range(256)}
    next_code = 256
    result = []
    current_sequence = chr(encoded_data[0]).encode()
    result.append(current_sequence)

    for code in encoded_data[1:]:
        if code in dictionary:
            entry = dictionary[code]
        elif code == next_code:
            entry = current_sequence + current_sequence[0:1]
        else:
            raise ValueError("Invalid LZW code")

        result.append(entry)
        dictionary[next_code] = current_sequence + entry[0:1]
        next_code += 1
        current_sequence = entry

    return b"".join(result)

In [4]:
def run(path, filename):
    # Load image
    with open(path, 'rb') as f:
        data = f.read()

    # Get access path
    if "TEXT" in path:
        path = path.replace('/TEXT\\' +  filename,'')
    elif "SOUND" in path:
        path = path.replace('/SOUND\\' +  filename,'')
    else:
        path = path.replace('/IMG\\' +  filename,'')
    
    # Compress file and save to file
    format = filename.split(".")[1]
    savingDirectory = os.path.join(os.getcwd(), 'CompressedFiles/' + format)
    if not os.path.isdir(savingDirectory):
        os.makedirs(savingDirectory)
    file = filename.split(".")[0] + '_compressed.pkl'
    with open(os.path.join(savingDirectory, file), 'wb') as f:
        pickle.dump(lzw_encode(data), f)

    # Decompress file
    with open(os.path.join(savingDirectory, file), 'rb') as f:
        compressed_data = pickle.load(f)
    decompressed_data = lzw_decode(compressed_data)

    # Save file
    savingDirectory = os.path.join(path,'DecompressedFiles/'  + format)
    if not os.path.isdir(savingDirectory):
        os.makedirs(savingDirectory)

    save_path = os.path.join(savingDirectory, filename.split(".")[0] + '_decompressed.' + format)
    with open(save_path, 'wb') as f:
        f.write(decompressed_data)

# Path to folder contain data 

In [5]:
path = r'E:\UIT\3rd year\Sem 2\CS232\Seminar'

## Sound

In [6]:
path_sound = path + '/SOUND'
start_time = time.time()
file_list = os.listdir(path_sound)
total = len(file_list)

for count, filename in enumerate(file_list):
    time.sleep(0.01)
    run(os.path.join(path_sound, filename), filename)
    progress_percent = (count + 1) / total * 100
    progress_bar = "[" + "#" * int(progress_percent / 10) + " " * (10 - int(progress_percent / 10)) + "]"
    print(f"Progress: {count+1}/{total} {progress_bar} ({progress_percent:.2f}%)", end="\r")

print("\nDone")
print("--- %s seconds ---" % (time.time() - start_time))

Progress: 4/4 [##########] (100.00%)
Done
--- 5.105449438095093 seconds ---


In [7]:
path_commpress_sound = path + '/CompressedFiles/wav'
for i, j in zip(os.listdir(path_sound), os.listdir(path_commpress_sound)):
    with open("LZW_Compression_Ratio_sound.txt", "a") as f:
        f.write(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_sound, j)))/os.path.getsize(os.path.join(path_sound, i))) + "\n")
    print(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_sound, j)))/os.path.getsize(os.path.join(path_sound, i))))

00ae03f6 1.5688488504810043
00eac343 1.2853037600679968
0a5cbf90 1.430926893867191
sound 0.8544487556339408


## Text

In [8]:
path_text = path + '/TEXT'
start_time = time.time()
file_list = os.listdir(path_text)
total = len(file_list)

for count, filename in enumerate(file_list):
    time.sleep(0.01)
    run(os.path.join(path_text, filename), filename)
    progress_percent = (count + 1) / total * 100
    progress_bar = "[" + "#" * int(progress_percent / 10) + " " * (10 - int(progress_percent / 10)) + "]"
    print(f"Progress: {count+1}/{total} {progress_bar} ({progress_percent:.2f}%)", end="\r")

print("\nDone")
print("--- %s seconds ---" % (time.time() - start_time))

Progress: 4/4 [##########] (100.00%)
Done
--- 0.2789933681488037 seconds ---


In [9]:
path_commpress_text = path + '/CompressedFiles/txt'
for i, j in zip(os.listdir(path_text), os.listdir(path_commpress_text)):
    with open("LZW_Compression_Ratio_text.txt", "a") as f:
        f.write(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_text, j)))/os.path.getsize(os.path.join(path_text, i))) + "\n")
    print(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_text, j)))/os.path.getsize(os.path.join(path_text, i))))

Text 1.4516933305630584
Text1 1.0850992824962455
Text2 0.7971638844013642
Text3 1.6331544761366845


## Image

In [10]:
path_img = path + '/IMG'
start_time = time.time()
file_list = os.listdir(path_img)
total = len(file_list)

for count, filename in enumerate(file_list):
    time.sleep(0.01)
    run(os.path.join(path_img, filename), filename)
    progress_percent = (count + 1) / total * 100
    progress_bar = "[" + "#" * int(progress_percent / 10) + " " * (10 - int(progress_percent / 10)) + "]"
    print(f"Progress: {count+1}/{total} {progress_bar} ({progress_percent:.2f}%)", end="\r")

print("\nDone")
print("--- %s seconds ---" % (time.time() - start_time))

Progress: 13/13 [##########] (100.00%)
Done
--- 11.781694650650024 seconds ---


In [11]:
path_commpress_img = path + '/CompressedFiles/bmp'
ratio = []
for i, j in zip(os.listdir(path_img), os.listdir(path_commpress_img)):
    # with open("LZW_Compression_Ratio_img.txt", "a") as f:
    #     f.write(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_img, j)))/os.path.getsize(os.path.join(path_img, i))) + "\n")
    ratio.append((os.path.getsize(os.path.join(path_commpress_img, j)))/os.path.getsize(os.path.join(path_img, i)))
    print(str(i.split(".")[0]) + " " + str((os.path.getsize(os.path.join(path_commpress_img, j)))/os.path.getsize(os.path.join(path_img, i))))

2heart 0.010218069414137446
background 1.1916874628981782
beach 0.9063172628715551
cartoon 0.9906987044624073
hanamichi 0.09110058820905802
harrypotter 0.8410773646542337
heart 0.06802547046449485
people 1.3155922977052668
pikachu 0.5586536221756347
planet 0.5972818875707774
rukawa 1.095062052662683
spiderman 0.887918933146166
todolist 1.1100092555864076


In [12]:
ratio = np.array(ratio)
print("Mean: ", np.mean(ratio))

Mean:  0.7433571516785384
