In [1]:
import numpy as np
import pandas as pd
import pywt
import pickle
import os


def compress_with_dwt(data, wavelet='db1', level=1, threshold=0.05, quantization_bits=2):
    # Decomposition using DWT
    coeffs = pywt.wavedec(data, wavelet, level=level)

    # Thresholding (Soft Thresholding in this example)
    thresholded_coeffs = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]

    # Quantization
    quantized_coeffs = [np.round(c * (2 ** quantization_bits)) for c in thresholded_coeffs]

    # Encoding (pickle is used for simplicity; you may choose a different encoding method)
    encoded_data = pickle.dumps(quantized_coeffs)

    return encoded_data

In [2]:
def decompress_with_dwt(encoded_data, wavelet='db1', level=1, quantization_bits=2, original_shape=None):
    # Decoding
    quantized_coeffs = pickle.loads(encoded_data)

    # Dequantization
    thresholded_coeffs = [c / (2 ** quantization_bits) for c in quantized_coeffs]

    # Reconstruction
    reconstructed_data = pywt.waverec(thresholded_coeffs, wavelet)

    # Reshape to the original shape if provided
    if original_shape is not None:
        reconstructed_data = reconstructed_data.reshape(original_shape)

    return reconstructed_data

In [3]:
import os

# Example usage
if __name__ == "__main__":
    # Assuming 'output.csv' is your CSV file with four columns of data
    csv_file_path = 'output2.csv'

    data = pd.read_csv('output2.csv')

    # Set your preferred parameters for the DWT
    wavelet_type = 'db2'  # Changed to 'db2' for potentially better compression
    decomposition_level = 3  # Increased decomposition level for more compression
    threshold_value = 0.01  # Lowered threshold value for potentially better compression
    quantization_bits = 2  # Decreased quantization bits for potentially better compression

    # Compression using DWT
    encoded_result = compress_with_dwt(data, wavelet=wavelet_type, level=decomposition_level,
                                       threshold=threshold_value, quantization_bits=quantization_bits)

    # Store the original shape for later reconstruction
    original_shape = data.shape

    # Decompression using DWT
    decompressed_result = decompress_with_dwt(encoded_result, wavelet=wavelet_type, level=decomposition_level,
                                              quantization_bits=quantization_bits, original_shape=original_shape)

    # Convert the decompressed result to a Pandas DataFrame
    decompressed_dataframe = pd.DataFrame(decompressed_result, columns=data.columns)

    # Save the compressed and decompressed data to new CSV files
    pd.DataFrame({'CompressedData': [encoded_result]}).to_csv('compressed_data.csv', index=False)
    decompressed_dataframe.to_csv('decompressed_data.csv', index=False)

    # Calculate the size of the original and compressed data
    original_size = os.path.getsize(csv_file_path)
    compressed_size = os.path.getsize('compressed_data.csv')

    # Calculate the compression ratio
    compression_ratio = original_size / compressed_size

    print(f"Size of Original Data: {original_size} bytes")
    print(f"Size of Compressed Data: {compressed_size} bytes")
    print(f"Compression Ratio: {compression_ratio:.2f}")

    # Additional prints for diagnostic purposes
    print(f"Number of Rows in Original Data: {original_shape[0]}")
    print(f"Number of Columns in Original Data: {original_shape[1]}")
    print(f"Number of Bytes in Original Data per Row: {original_size / original_shape[0]:.2f} bytes")
    

Size of Original Data: 319003017 bytes
Size of Compressed Data: 506654646 bytes
Compression Ratio: 0.63
Number of Rows in Original Data: 108004
Number of Columns in Original Data: 140
Number of Bytes in Original Data per Row: 2953.62 bytes
