In [8]:
import numpy as np

def pair_hamming_distances(arr1, arr2):
    """
    Compute the Hamming distance between two sets of uint8 arrays.

    Parameters:
    arr1 (ndarray): Shape (num_bit_vectors, dimension_bit_vector)
    arr2 (ndarray): Shape (num_bit_vectors, dimension_bit_vector)

    Returns:
    ndarray: Shape (num_bit_vectors,) - the Hamming distances between corresponding vectors in arr1 and arr2.
    """
    # Step 1: XOR the two arrays
    xor_result = np.bitwise_xor(arr1, arr2)

    # Step 2: Count the number of differing bits for each element
    # Use `bin(x).count('1')` to count the number of 1s in the binary representation
    hamming_distance = np.vectorize(lambda x: bin(x).count('1'))(xor_result)

    # Step 3: Sum all the bit differences
    total_hamming_distance = np.sum(hamming_distance, axis=1)

    return total_hamming_distance

# Example usage
arr1 = np.array([[0, 1, 0, 1], [1, 0, 1, 0]], dtype=np.uint8) # 32-bit vectors
arr2 = np.array([[0, 6, 1, 1], [1, 0, 1, 1]], dtype=np.uint8)
print(pair_hamming_distances(arr1, arr2))  # Expected output: [2, 1]

[4 1]


In [9]:
def hamming_distances_vectorized(point, lst_binary):
    """
    Compute the Hamming distances between a point and a list of binary codes.
    """
    
    # XOR the point with the list
    xor_result = point ^ lst_binary  # XOR result is still uint64
    
    # Convert uint64 to uint8 view (8 bytes per uint64)
    xor_bytes = xor_result.view(np.uint8)  # Interpret each uint64 as 8 uint8 values
    
    # Unpack bits and count the number of 1s for each uint64
    unpacked_bits = np.unpackbits(xor_bytes, axis=0).reshape(len(lst_binary), 64)
    distances = unpacked_bits.sum(axis=1)
    
    return distances

# Example usage
point = np.array([0, 1, 0, 1], dtype=np.uint64)
lst_binary = np.array([[0, 6, 1, 1], [1, 0, 1, 1]], dtype=np.uint64)
print(hamming_distances_vectorized(point, lst_binary))  # Expected output: [2, 1]

ValueError: cannot reshape array of size 512 into shape (2,64)