In [19]:
from scipy.fftpack import dct

def compress_with_dct(data, quantization_bits=8):
    # Apply 1D DCT along the columns (temporal dimension)
    dct_transformed = np.apply_along_axis(dct, axis=0, arr=data)

    # Quantization
    quantized_coeffs = np.round(dct_transformed * (2 ** quantization_bits))

    # Encoding (pickle is used for simplicity; you may choose a different encoding method)
    encoded_data = pickle.dumps(quantized_coeffs)

    return encoded_data


In [20]:
from scipy.fftpack import idct

def decompress_with_dct(encoded_data, quantization_bits=8, original_shape=None):
    # Decoding
    quantized_coeffs = pickle.loads(encoded_data)

    # Dequantization
    dct_transformed = quantized_coeffs / (2 ** quantization_bits)

    # Apply inverse DCT along the columns (temporal dimension)
    reconstructed_data = np.apply_along_axis(idct, axis=0, arr=dct_transformed)

    # Reshape to the original shape if provided
    if original_shape is not None:
        reconstructed_data = reconstructed_data.reshape(original_shape)

    return reconstructed_data


In [21]:
import os

# Example usage
if __name__ == "__main__":
    # Assuming 'output.csv' is your CSV file with four columns of data
    csv_file_path = 'output2.csv'

    data = pd.read_csv('output2.csv')

    # Set your preferred parameters for DCT
    quantization_bits = 5  # Adjust the number of quantization bits as needed

    # Compression using DCT
    encoded_result = compress_with_dct(data, quantization_bits=quantization_bits)

    # Store the original shape for later reconstruction
    original_shape = data.shape

    # Decompression using DCT
    decompressed_result = decompress_with_dct(encoded_result, quantization_bits=quantization_bits, original_shape=original_shape)

    # Convert the decompressed result to a Pandas DataFrame
    decompressed_dataframe = pd.DataFrame(decompressed_result, columns=data.columns)

    # Save the compressed and decompressed data to new CSV files
    pd.DataFrame({'CompressedData': [encoded_result]}).to_csv('compressed_data_dct.csv', index=False)
    decompressed_dataframe.to_csv('decompressed_data_dct.csv', index=False)

    # Calculate the size of the original and compressed data
    original_size = os.path.getsize(csv_file_path)
    compressed_size = os.path.getsize('compressed_data_dct.csv')

    # Calculate the compression ratio
    compression_ratio = original_size / compressed_size

    print(f"Size of Original Data: {original_size} bytes")
    print(f"Size of Compressed Data: {compressed_size} bytes")
    print(f"Compression Ratio: {compression_ratio:.2f}")

    # Additional prints for diagnostic purposes
    print(f"Number of Rows in Original Data: {original_shape[0]}")
    print(f"Number of Columns in Original Data: {original_shape[1]}")
    print(f"Number of Bytes in Original Data per Row: {original_size / original_shape[0]:.2f} bytes")

Size of Original Data: 156389232 bytes
Size of Compressed Data: 213688181 bytes
Compression Ratio: 0.73
Number of Rows in Original Data: 108004
Number of Columns in Original Data: 70
Number of Bytes in Original Data per Row: 1447.99 bytes
