# 1.

In [99]:
import os
import zipfile

# Get a list of all .jpg files in the current directory
files = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('.jpg')]

# List to store individual compression rates
compression_rates = []

# Iterate over the .jpg files and compress them
for file in files:
    # Define the output path for the compressed file
    output_path = file + ".zip"
    
    try:
        # Compress the file using zipfile with ZIP_DEFLATED mode for better compression
        with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zip_file:
            zip_file.write(file)

        # Calculate the compression rate
        original_size = os.path.getsize(file)
        compressed_size = os.path.getsize(output_path)
        compression_rate = (original_size - compressed_size) / original_size * 100
        compression_rates.append(compression_rate)

    except Exception as e:
        print(f"Error processing {file}. Reason: {e}")

# Calculate the average compression rate for all .jpg files
average_compression_rate = sum(compression_rates) / len(compression_rates)
print(f"Average file compression rate: {average_compression_rate:.2f}%")

Average file compression rate: 0.08%


# 2.

In [68]:
import numpy as np
import cv2
from sklearn.cluster import KMeans
import heapq
import struct
from typing import Tuple, Dict
import time

class HuffmanTreeNode:
    def __init__(self, value=None, frequency=None):
        self.value = value
        self.frequency = frequency
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequency < other.frequency

def image_to_data(image: np.ndarray) -> np.ndarray:
    return image.reshape((-1, 3))

def quantize_image(image: np.ndarray, n_colors: int) -> Tuple[np.ndarray, np.ndarray]:
    data = image_to_data(image)
    kmeans = KMeans(n_clusters=n_colors, n_init=10)
    kmeans.fit(data)
    new_data = kmeans.cluster_centers_[kmeans.labels_]
    quantized_image = new_data.reshape(image.shape).astype(np.uint8)
    return quantized_image, kmeans.cluster_centers_.astype(np.uint8)

def to_indexed_image(image: np.ndarray, colormap: np.ndarray) -> np.ndarray:
    data = image_to_data(image)
    indexed_data = np.argmin(np.linalg.norm(colormap - data[:, np.newaxis], axis=2), axis=1)
    return indexed_data.reshape(image.shape[:-1])

def build_huffman_tree(frequency: Dict[int, int]) -> HuffmanTreeNode:
    heap = [HuffmanTreeNode(value=char, frequency=freq) for char, freq in frequency.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged_node = HuffmanTreeNode(frequency=left.frequency + right.frequency)
        merged_node.left = left
        merged_node.right = right
        heapq.heappush(heap, merged_node)
    return heap[0]

def build_huffman_codes(tree: HuffmanTreeNode) -> Dict[int, str]:
    codes = {}
    def traverse(node, code=""):
        if node is None:
            return
        if node.value is not None:
            codes[node.value] = code
            return
        traverse(node.left, code + "0")
        traverse(node.right, code + "1")
    traverse(tree)
    return codes

def huffman_compress(image: np.ndarray, colormap_size: int, output_file: str):
    quantized_image, colormap = quantize_image(image, colormap_size)
    indexed_image = to_indexed_image(quantized_image, colormap)
    color_counts = dict(zip(*np.unique(indexed_image, return_counts=True)))
    huffman_tree = build_huffman_tree(color_counts)
    huffman_codes = build_huffman_codes(huffman_tree)
    encoded_data = ''.join(huffman_codes[i] for i in indexed_image.ravel())
    padded_encoded_data = encoded_data + '0' * (8 - len(encoded_data) % 8)
    encoded_bytes = bytearray(int(padded_encoded_data[i:i+8], 2) for i in range(0, len(padded_encoded_data), 8))
    
    with open(output_file, 'wb') as file:
        file.write(struct.pack('I', colormap_size))
        
        file.write(colormap.tobytes())
        
        file.write(struct.pack('I', len(indexed_image.ravel())))
        
        file.write(struct.pack('I', len(color_counts)))
        for color, count in color_counts.items():
            file.write(struct.pack('B', color))
            file.write(struct.pack('I', count))
        
        file.write(encoded_bytes)

def huffman_decompress(input_file: str, output_file: str):
    with open(input_file, 'rb') as file:
        colormap_size = struct.unpack('I', file.read(4))[0]
        colormap = np.frombuffer(file.read(3 * colormap_size), dtype=np.uint8).reshape(-1, 3)
        
        original_length = struct.unpack('I', file.read(4))[0]
        
        num_colors = struct.unpack('I', file.read(4))[0]
        color_counts = {}
        for _ in range(num_colors):
            color = struct.unpack('B', file.read(1))[0]
            count = struct.unpack('I', file.read(4))[0]
            color_counts[color] = count
        
        encoded_data = file.read()

    huffman_tree = build_huffman_tree(color_counts)
    huffman_codes = build_huffman_codes(huffman_tree)
    decoded_data = []
    current_code = ""
    for byte in encoded_data:
        current_code += bin(byte)[2:].rjust(8, '0')
        while current_code:
            found = False
            for char, code in huffman_codes.items():
                if current_code.startswith(code):
                    decoded_data.append(char)
                    current_code = current_code[len(code):]
                    found = True
                    break
            if not found:
                break

    indexed_image = np.array(decoded_data[:original_length], dtype=np.uint8).reshape(input_image.shape[:-1])
    quantized_image = colormap[indexed_image]
    cv2.imwrite(output_file, quantized_image)

if __name__ == "__main__":
    input_image_path = 'ZhongXinaSmaller.jpg'
    compressed_file = 'compressed.hmc'

    input_image = cv2.imread(input_image_path)
    if input_image is None:
        raise ValueError(f"Could not open or find the image: {input_image_path}")

    colormap_sizes = [256, 128, 64, 32, 16, 8]
    for colormap_size in colormap_sizes:
        print(f"Compressing with colormap size: {colormap_size}")

        start_time = time.time()
        
        huffman_compress(input_image, colormap_size, compressed_file)
        
        decompressed_file = f'result_huffman_{colormap_size}.bmp'
        
        huffman_decompress(compressed_file, decompressed_file)

        elapsed_time = time.time() - start_time
        hours, rem = divmod(elapsed_time, 3600)
        minutes, seconds = divmod(rem, 60)
        milliseconds = int((seconds % 1) * 1000)
        print(f"Time taken: {int(hours):02}:{int(minutes):02}:{int(seconds):02}.{milliseconds:03} for colormap size: {colormap_size}\n")

Compressing with colormap size: 256
Time taken: 00:00:32.836 for colormap size: 256

Compressing with colormap size: 128
Time taken: 00:00:15.561 for colormap size: 128

Compressing with colormap size: 64
Time taken: 00:00:07.715 for colormap size: 64

Compressing with colormap size: 32
Time taken: 00:00:03.691 for colormap size: 32

Compressing with colormap size: 16
Time taken: 00:00:01.655 for colormap size: 16

Compressing with colormap size: 8
Time taken: 00:00:00.857 for colormap size: 8

