In [1]:
import torch

# Load the tensor
indices = torch.load('indices.pt')

indices_np = indices.numpy()  # Convert PyTorch tensor to NumPy array

  indices = torch.load('indices.pt')


**uint 16 for k=512**

In [125]:
import numpy as np
import reedsolo

# Initialize RS codec
r = 120  # Redundancy (4 symbols per block)
n = 255  # Total symbols per block
k = n - r  # Data symbols per block
rs = reedsolo.RSCodec(r)

# Simulated data
original_size = 503 # Original data size
indices = np.random.randint(0, 512, size=(original_size,), dtype=np.uint16)

# Calculate padding
padding_length = (k - (len(indices) % k)) % k
padded_indices = np.pad(indices, (0, padding_length), mode='constant', constant_values=0)
indices_bytes = padded_indices.tobytes()

# Ensure that padded data length aligns with expected block size
assert len(indices_bytes) % k == 0, "Padded data size is not aligned with the block size."

# Encode the data
encoded_data = rs.encode(indices_bytes)

# Simulate noise (add random bit errors based on error ratio)
def add_noise_with_ratio(data, error_ratio):
    """
    Add noise to the encoded data by flipping bits based on the error ratio.

    Args:
        data (bytes): Encoded data.
        error_ratio (float): Fraction of bits to flip (e.g., 0.01 for 1% errors).

    Returns:
        bytes: Noisy data.
    """
    noisy_data = bytearray(data)
    total_bits = len(noisy_data) * 8  # Total number of bits in the data
    num_errors = int(total_bits * error_ratio)  # Calculate the number of bits to flip

    print(f"Introducing {num_errors} errors out of {total_bits} total bits (ratio = {error_ratio}).")

    for _ in range(num_errors):
        byte_index = np.random.randint(len(noisy_data))  # Random byte index
        bit_index = np.random.randint(8)  # Random bit within the byte
        noisy_data[byte_index] ^= (1 << bit_index)  # Flip the selected bit

    return bytes(noisy_data)

# Introduce noise with a given error ratio
error_ratio = 0.025  # Flip 1% of the total bits
noisy_data = add_noise_with_ratio(encoded_data, error_ratio)


# Process data block by block
def decode_blocks(encoded_data, block_size, data_size, redundancy):
    decoded_result = bytearray()
    num_blocks = len(encoded_data) // block_size

    print(f"Decoding {num_blocks} blocks of size {block_size} bytes each.")

    for i in range(num_blocks):
        start = i * block_size
        end = start + block_size
        block = encoded_data[start:end]

        try:
            # Attempt to decode the block
            decoded_block = rs.decode(block)
            if isinstance(decoded_block, tuple):  # Handle tuple output
                decoded_block = decoded_block[0]
            decoded_result.extend(decoded_block[:data_size])  # Append only data symbols
        except reedsolo.ReedSolomonError:
            # Decoding failed, append raw noisy data for this block
            print(f"Block {i + 1} could not be decoded. Retaining noisy data.")
            decoded_result.extend(block[:data_size])  # Append raw data symbols (exclude redundancy)

    return decoded_result

# Decode the noisy data
decoded_data = decode_blocks(noisy_data, block_size=n, data_size=k, redundancy=r)

# Remove padding
decoded_data = decoded_data[:original_size * 2]  # Trim to original data size (bytes)
decoded_indices = np.frombuffer(decoded_data, dtype=np.uint16)

# Validate shapes
print("Decoded Indices (Best Effort):")
print(decoded_indices)

print("\nOriginal Indices:")
print(indices)

print(f"\nOriginal shape: {indices.shape}, Decoded shape: {decoded_indices.shape}")


Decoding 6 blocks of size 255 bytes each.
Block 5 could not be decoded. Retaining noisy data.
Decoded Indices (Best Effort):
[  394   290   165   493    23   129   121   285   113   427   393    23
    77   229   419   188    82   265   297   125   450   283   457   128
   455   213   424   295   302   286    29   119   402   315   436   107
   429   378   243    69   229   280   316   275    84   476   238   283
   113    20   466   287   453   372    44   256   255   377    51   185
   393   392   373   493    38   158     7    50   365   414    16   246
   170   197   336   195   403   509   130   298   128   375   135   450
   381   406   283    28    19   237   472   433   167   286   131   421
   291    91   188   281   209   226   412   236    21   457   484   350
   108   263   179    64   101    87   374    35   388   480   135    13
   329   271    13   116   132   172   330   188   102   119   387   240
   466   212   415   501   253   480    21   250   149   331   210   423

Simulation

In [43]:
import numpy as np
import reedsolo

# Initialize RS codec
r = 110  # Redundancy (4 symbols per block)
n = 255  # Total symbols per block
k = n - r  # Data symbols per block
rs = reedsolo.RSCodec(r)

def transmitter(indices, output_file):
    # Calculate padding
    padding_length = (k - (len(indices) % k)) % k  # Padding to align data size to block size
    padded_indices = np.pad(indices, (0, padding_length), mode='constant', constant_values=0)
    indices_bytes = padded_indices.tobytes()

    # Ensure that padded data length aligns with expected block size
    assert len(indices_bytes) % k == 0, "Padded data size is not aligned with the block size."

    # Encode the data
    encoded_data = rs.encode(indices_bytes)

    # Save encoded data to a .bin file
    with open(output_file, "wb") as f:
        f.write(encoded_data)

    print(f"Encoded data saved to {output_file}")
    return len(indices), len(padded_indices), encoded_data

# Generate simulated data
original_size = 12288  # Original data size (indices count)
indices = np.random.randint(0, 256, size=(original_size,), dtype=np.uint8)  # uint8 indices

# Transmit the encoded data
original_size, padded_size, encoded_data = transmitter(indices, "transmitted_data.bin")




def add_noise(input_file, output_file, error_ratio):
    """
    Add noise to the data in a binary file based on a given error ratio.
    
    Args:
        input_file (str): Path to the input binary file.
        output_file (str): Path to save the noisy binary file.
        error_ratio (float): Fraction of bits to flip (e.g., 0.01 for 1% of bits).
    
    Returns:
        noisy_data (bytes): The noisy data.
    """
    # Read encoded data from file
    with open(input_file, "rb") as f:
        encoded_data = f.read()

    # Calculate the total number of bits in the file
    total_bits = len(encoded_data) * 8

    # Calculate the number of errors based on the error ratio
    num_errors = int(total_bits * error_ratio)
    print(f"Introducing {num_errors} errors out of {total_bits} total bits (ratio = {error_ratio}).")

    # Add noise to the data
    noisy_data = bytearray(encoded_data)
    for _ in range(num_errors):
        byte_index = np.random.randint(len(noisy_data))
        bit_index = np.random.randint(8)
        noisy_data[byte_index] ^= (1 << bit_index)  # Flip a random bit

    # Save the noisy data to a .bin file
    with open(output_file, "wb") as f:
        f.write(noisy_data)

    print(f"Noisy data saved to {output_file}")
    return noisy_data



def receiver(input_file, output_file, original_size):
    # Load noisy data
    with open(input_file, "rb") as f:
        received_data = f.read()

    # Decode block by block
    def decode_blocks(encoded_data, block_size, data_size, redundancy):
        decoded_result = bytearray()
        num_blocks = len(encoded_data) // block_size

        print(f"Decoding {num_blocks} blocks of size {block_size} bytes each.")

        for i in range(num_blocks):
            start = i * block_size
            end = start + block_size
            block = encoded_data[start:end]

            try:
                # Attempt to decode the block
                decoded_block = rs.decode(block)
                if isinstance(decoded_block, tuple):  # Handle tuple output
                    decoded_block = decoded_block[0]
                decoded_result.extend(decoded_block[:data_size])  # Append only data symbols
            except reedsolo.ReedSolomonError:
                # Decoding failed, append raw noisy data for this block
                print(f"Block {i + 1} could not be decoded. Retaining noisy data.")
                decoded_result.extend(block[:data_size])  # Append raw data symbols (exclude redundancy)

        return decoded_result

    # Decode the noisy data
    decoded_data = decode_blocks(received_data, block_size=n, data_size=k, redundancy=r)

    # Remove padding
    decoded_data = decoded_data[:original_size]  # Trim to original data size (bytes)
    decoded_indices = np.frombuffer(decoded_data, dtype=np.uint8)

    # Save decoded indices to a .bin file
    with open(output_file, "wb") as f:
        f.write(decoded_indices.tobytes())

    print(f"Decoded indices saved to {output_file}")
    return decoded_indices

# Transmit the data
original_size, padded_size, encoded_data = transmitter(indices, "transmitted_data.bin")

# Simulate noise
#noisy_data = add_noise("transmitted_data.bin", "noisy_data.bin", num_errors=30)
noisy_data = add_noise("transmitted_data.bin", "noisy_data.bin", error_ratio= 0.04)


# Receive and decode the data
decoded_indices = receiver("noisy_data.bin", "decoded_indices.bin", original_size)

# Validate shapes
print("\nDecoded Indices (Best Effort):")
print(decoded_indices)

print("\nOriginal Indices:")
print(indices)

print(f"\nOriginal shape: {indices.shape}, Decoded shape: {decoded_indices.shape}")


Encoded data saved to transmitted_data.bin
Encoded data saved to transmitted_data.bin
Introducing 6936 errors out of 173400 total bits (ratio = 0.04).
Noisy data saved to noisy_data.bin
Decoding 85 blocks of size 255 bytes each.
Block 1 could not be decoded. Retaining noisy data.
Block 2 could not be decoded. Retaining noisy data.
Block 3 could not be decoded. Retaining noisy data.
Block 4 could not be decoded. Retaining noisy data.
Block 5 could not be decoded. Retaining noisy data.
Block 6 could not be decoded. Retaining noisy data.
Block 7 could not be decoded. Retaining noisy data.
Block 8 could not be decoded. Retaining noisy data.
Block 9 could not be decoded. Retaining noisy data.
Block 10 could not be decoded. Retaining noisy data.
Block 11 could not be decoded. Retaining noisy data.
Block 12 could not be decoded. Retaining noisy data.
Block 13 could not be decoded. Retaining noisy data.
Block 14 could not be decoded. Retaining noisy data.
Block 15 could not be decoded. Retaini

In [44]:
# calculate number of similar elements between indices and decoded indices and give percentage

def calculate_similarity(indices, decoded_indices):
    num_similar = np.sum(indices == decoded_indices)
    total_elements = indices.size
    similarity = num_similar / total_elements
    return similarity, num_similar, total_elements

similarity,num_similar, total_elements = calculate_similarity(indices, decoded_indices)
print(f"\nSimilarity: {similarity:.2%}")
print(f"Errors: {total_elements - num_similar} out of {total_elements} elements")


Similarity: 74.03%
Errors: 3191 out of 12288 elements


In [253]:
def calculate_ber(bin_file1, bin_file2):
    with open(bin_file1, 'rb') as file1, open(bin_file2, 'rb') as file2:
        data1 = file1.read()
        data2 = file2.read()
    
    # Calculate the number of bit errors
    bit_errors = sum(bin(int(byte1) ^ int(byte2)).count('1') for byte1, byte2 in zip(data1, data2))
    
    # Calculate the bit error rate
    ber = bit_errors / (8 * len(data1))
    return ber, bit_errors, len(data1)*8

ber, errors, bits = calculate_ber('transmitted_data.bin', 'transmitted_data_new.bin')
print(f"Bit Error Rate: {ber:.2e}")
print(f"Number of bit errors: {errors}")
print(f"Total number of bits: {bits}")

Bit Error Rate: 0.00e+00
Number of bit errors: 0
Total number of bits: 195840


In [None]:


ber, errors, bits = calculate_ber('encoded_feature_rayleigh_image24.bin', 'corrupted_feature_rayleigh_image24.bin')
print(f"Bit Error Rate: {ber:.2e}")
print(f"Number of bit errors: {errors}")
print(f"Total number of bits: {bits}")

# Compact binary file saving method

In [254]:
from bitarray import bitarray
import numpy as np

# Example indices array (values between 0 and 511)
#indices = np.array([3, 511, 256, 128, 5], dtype=np.uint16)

original_size = 5  # Original data size
indices = np.random.randint(0, 512, size=(original_size,), dtype=np.uint16)

# Convert indices to binary representation
bit_stream = bitarray()
for index in indices:
    bit_stream.extend(format(index, '09b'))  # 9-bit binary format

# Save to a binary file
with open('indices.bin', 'wb') as f:
    bit_stream.tofile(f)

print(indices)

[414 162 454 469 319]


In [255]:
from bitarray import bitarray
import numpy as np

# Read the binary file
bit_stream = bitarray()
with open('indices.bin', 'rb') as f:
    bit_stream.fromfile(f)

# Decode indices from the bit stream
indices = []
i = 0
while i + 9 <= len(bit_stream):  # Ensure there are at least 9 bits remaining
    indices.append(int(bit_stream[i:i+9].to01(), 2))
    i += 9  # Move to the next chunk

# Convert to NumPy array for consistency
indices_np = np.array(indices, dtype=np.uint16)

print(indices_np)  # Output the reconstructed indices


[414 162 454 469 319]


In [None]:
k = 256

bits_per_index = int(np.ceil(np.log2(k)))

# Load indices from the binary file
def load_indices_from_binary(file_path, bits_per_index):
    bit_stream = bitarray()
    with open(file_path, 'rb') as f:
        bit_stream.fromfile(f)
    num_indices = len(bit_stream) // bits_per_index  # Calculate the number of indices
    indices = [int(bit_stream[i:i+bits_per_index].to01(), 2) for i in range(0, len(bit_stream), bits_per_index)]
    return np.array(indices[:num_indices], dtype=np.uint16 if bits_per_index > 8 else np.uint8)

In [256]:
bit_stream

bitarray('110011110010100010111000110111010101100111111000')

# Efficiently saving the data

In [260]:
from bitarray import bitarray
import numpy as np
import reedsolo

# Initialize RS codec
r = 127  # Redundancy (symbols per block)
n = 255  # Total symbols per block
data_symbols = n - r  # Data symbols per block
rs = reedsolo.RSCodec(r)

# Parameters
original_size = 513  # Original data size
k = 512  # Number of clusters (max value for indices)
bits_per_index = int(np.ceil(np.log2(k)))  # Calculate bits needed for `k`

# === Transmitter ===

# Simulated data
indices = np.random.randint(0, k, size=(original_size,))

# Pack indices into a bit stream
bit_stream = bitarray()
for index in indices:
    bit_stream.extend(format(index, f'0{bits_per_index}b'))

# Convert the bit stream to bytes
packed_data = bit_stream.tobytes()

# Pad for Reed-Solomon compatibility
padding_length = (data_symbols - (len(packed_data) % data_symbols)) % data_symbols
padded_data = packed_data + bytes(padding_length)

# Encode with Reed-Solomon
encoded_data = rs.encode(padded_data)

# Save the encoded data
encoded_file = "transmitted_data.bin"
with open(encoded_file, "wb") as f:
    f.write(encoded_data)
print(f"Encoded data saved to '{encoded_file}'.\n")

# Introduce noise to the encoded data
def add_noise_with_ratio(input_file, output_file, error_ratio):
    """
    Add noise to the encoded data in a binary file and save the result.
    """
    with open(input_file, "rb") as f:
        encoded_data = f.read()

    noisy_data = bytearray(encoded_data)
    total_bits = len(noisy_data) * 8
    num_errors = int(total_bits * error_ratio)

    print(f"Introducing {num_errors} errors out of {total_bits} total bits (ratio = {error_ratio}).")

    for _ in range(num_errors):
        byte_index = np.random.randint(len(noisy_data))
        bit_index = np.random.randint(8)
        noisy_data[byte_index] ^= (1 << bit_index)

    with open(output_file, "wb") as f:
        f.write(noisy_data)

    print(f"Noisy data saved to '{output_file}'.")
    return noisy_data

noisy_file = "noisy_data.bin"
noisy_data = add_noise_with_ratio(encoded_file, noisy_file, error_ratio=0.05)

# === Receiver ===

# Decode Reed-Solomon blocks
def decode_blocks(input_file, block_size, data_size, original_size, bits_per_index):
    """
    Decode noisy data from a binary file and reconstruct the original indices.
    """
    decoded_result = bytearray()

    with open(input_file, "rb") as f:
        encoded_data = f.read()

    num_blocks = len(encoded_data) // block_size
    print(f"Decoding {num_blocks} blocks of size {block_size} bytes each.")

    for i in range(num_blocks):
        start = i * block_size
        end = start + block_size
        block = encoded_data[start:end]

        try:
            decoded_block = rs.decode(block)
            if isinstance(decoded_block, tuple):
                decoded_block = decoded_block[0]
            decoded_result.extend(decoded_block[:data_symbols])
        except reedsolo.ReedSolomonError:
            print(f"Block {i + 1} could not be decoded. Retaining noisy data.")
            decoded_result.extend(block[:data_symbols])

    # Remove padding
    decoded_data = decoded_result[:len(decoded_result) - padding_length]

    # Convert decoded bytes back into a bit stream
    bit_stream = bitarray()
    bit_stream.frombytes(decoded_data)

    # Decode indices from the bit stream
    indices_decoded = []
    i = 0
    while i + bits_per_index <= len(bit_stream):
        indices_decoded.append(int(bit_stream[i:i+bits_per_index].to01(), 2))
        i += bits_per_index

    return np.array(indices_decoded[:original_size], dtype=np.uint16 if bits_per_index > 8 else np.uint8)

decoded_indices = decode_blocks(noisy_file, block_size=n, data_size=data_symbols, original_size=original_size, bits_per_index=bits_per_index)

# Save recovered indices to a compact binary file
def save_indices_to_binary(indices_np, bits_per_index, file_path):
    """
    Save indices to a compact binary file using the required number of bits per index.
    """
    packed_data = bitarray()
    for idx in indices_np:
        packed_data.extend(format(idx, f'0{bits_per_index}b'))  # Pack indices into binary format
    with open(file_path, "wb") as f:
        packed_data.tofile(f)
    print(f"Recovered indices saved to '{file_path}'")

recovered_file = "recovered_indices.bin"
save_indices_to_binary(decoded_indices, bits_per_index, recovered_file)

# Validate results
print("\nDecoded Indices (First 20):", decoded_indices[:20])
print("Original Indices (First 20):", indices[:20])

# Validate shapes and similarity
print(f"\nOriginal shape: {indices.shape}, Decoded shape: {decoded_indices.shape}")
num_similar = np.sum(indices == decoded_indices)
total_elements = indices.size
similarity = num_similar / total_elements
print(f"\nSimilarity: {similarity:.2%}")
print(f"Errors: {total_elements - num_similar} out of {total_elements} elements.")


Encoded data saved to 'transmitted_data.bin'.

Introducing 510 errors out of 10200 total bits (ratio = 0.05).
Noisy data saved to 'noisy_data.bin'.
Decoding 5 blocks of size 255 bytes each.
Block 1 could not be decoded. Retaining noisy data.
Block 2 could not be decoded. Retaining noisy data.
Block 3 could not be decoded. Retaining noisy data.
Block 4 could not be decoded. Retaining noisy data.
Block 5 could not be decoded. Retaining noisy data.
Recovered indices saved to 'recovered_indices.bin'

Decoded Indices (First 20): [256 510 237 195  40 239  69 498 255  74 174 412 500 247 332  36 341 418
 264  41]
Original Indices (First 20): [388 188 237 193  40 239  77 498 254  74 170 412 372 243 332  36 337 418
 266  41]

Original shape: (513,), Decoded shape: (513,)

Similarity: 65.89%
Errors: 175 out of 513 elements.


Hamming Distance Testing

In [264]:
def int_to_gray(n):
    return n ^ (n >> 1)

def generate_gray_codes(K):
    gray_codes = [int_to_gray(i) for i in range(K)]
    return gray_codes

K = 512  # Number of clusters
gray_codes = generate_gray_codes(K)

gray_codes

[0,
 1,
 3,
 2,
 6,
 7,
 5,
 4,
 12,
 13,
 15,
 14,
 10,
 11,
 9,
 8,
 24,
 25,
 27,
 26,
 30,
 31,
 29,
 28,
 20,
 21,
 23,
 22,
 18,
 19,
 17,
 16,
 48,
 49,
 51,
 50,
 54,
 55,
 53,
 52,
 60,
 61,
 63,
 62,
 58,
 59,
 57,
 56,
 40,
 41,
 43,
 42,
 46,
 47,
 45,
 44,
 36,
 37,
 39,
 38,
 34,
 35,
 33,
 32,
 96,
 97,
 99,
 98,
 102,
 103,
 101,
 100,
 108,
 109,
 111,
 110,
 106,
 107,
 105,
 104,
 120,
 121,
 123,
 122,
 126,
 127,
 125,
 124,
 116,
 117,
 119,
 118,
 114,
 115,
 113,
 112,
 80,
 81,
 83,
 82,
 86,
 87,
 85,
 84,
 92,
 93,
 95,
 94,
 90,
 91,
 89,
 88,
 72,
 73,
 75,
 74,
 78,
 79,
 77,
 76,
 68,
 69,
 71,
 70,
 66,
 67,
 65,
 64,
 192,
 193,
 195,
 194,
 198,
 199,
 197,
 196,
 204,
 205,
 207,
 206,
 202,
 203,
 201,
 200,
 216,
 217,
 219,
 218,
 222,
 223,
 221,
 220,
 212,
 213,
 215,
 214,
 210,
 211,
 209,
 208,
 240,
 241,
 243,
 242,
 246,
 247,
 245,
 244,
 252,
 253,
 255,
 254,
 250,
 251,
 249,
 248,
 232,
 233,
 235,
 234,
 238,
 239,
 237,
 236,
 228,
