# Hamming Distance

Mojo vs Python for computing the [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two arrays.

## Python

In [1]:
%%python 

# Import required modules
import time              # Standard library for time-related tasks
import numpy as np       # NumPy for numerical computations
from timeit import timeit# Standard library for timing code

# Initialize the size of the numpy arrays
n = 1_000_000_000  # 1 billion elements

# Generate random numpy arrays for calculations
# np.random.randint(0, 4, size=n) generates an array of `n` random integers between 0 and 3 (inclusive)
anp = np.random.randint(0, 4, size=n)
bnp = np.random.randint(0, 4, size=n)

# Custom function to format and print time measurements
def print_formatter(string, value):
    """
    Custom function for formatted printing.
    
    Parameters:
    - string (str): The description of the value
    - value (float): The value to be printed
    
    Output:
    - Formatted print to console
    """
    print(f"{string}: {value:5.5f}")

# At this point, 'anp' and 'bnp' contain random integers between 0 and 3
# You can proceed to perform operations or further computations on these arrays.


### Analysis

In [2]:
%%python 

# Define a function to compute the Hamming distance between two numpy arrays
def hamming_distance(a, b):
    """
    Compute the Hamming distance between two arrays 'a' and 'b'.
    
    Parameters:
    - a (numpy array): The first array
    - b (numpy array): The second array
    
    Returns:
    - int: The Hamming distance between arrays 'a' and 'b'
    """
    
    # Utilize numpy's count_nonzero function for optimized performance.
    # The expression (a != b) creates a Boolean array where True indicates differing elements.
    # np.count_nonzero counts the number of True values in this Boolean array.
    return np.count_nonzero(a != b)

# Measure the time taken for the hamming_distance function to execute
# The lambda function allows us to time the function without arguments
# 'number=100' indicates that the time measurement will be averaged over 100 executions.
secs = timeit(lambda: hamming_distance(anp, bnp), number=100) / 100

# Output section
print("=== Hamming Distance Performance ===")
# Output the calculated Hamming distance
print_formatter("hamming_distance value", hamming_distance(anp, bnp))

# Output the time taken to execute the hamming_distance function (converted to milliseconds)
print_formatter("hamming_distance time (ms)", 1000 * secs)


=== Hamming Distance Performance ===
hamming_distance value: 749983321.00000
hamming_distance time (ms): 564.26367


## Mojo

### Create Tensors

In [3]:
# Load Mojo librarires - some may not be needed for this notebook

from algorithm import vectorize, parallelize
from benchmark import Benchmark
from math import div_ceil, min, sqrt, floor, mod, max
from memory import memset_zero
from memory.buffer import Buffer
from memory.unsafe import DTypePointer
from random import rand, randint, random_float64
from runtime.llcl import Runtime, num_cores
from sys.intrinsics import strided_load
from sys.info import simdwidthof
from tensor import Tensor
from time import now
from utils.list import VariadicList

In [4]:
# Declare an integer 'n' and set it to one billion
let n: Int = 1_000_000_000  

# Create tensors 'x' and 'y' of int64 data type with 'n' elements
var x = Tensor[DType.int64](n) 
var y = Tensor[DType.int64](n)

# Fill tensors 'x' and 'y' with random integers between 0 and 3
randint[DType.int64](x.data(), n, 0, 3)  
randint[DType.int64](y.data(), n, 0, 3)  


### Typed Approach

In [5]:
# Define a function named 'mojo_fn_hamming' that takes two tensors 'x' and 'y' as arguments and returns an Int64 value
fn mojo_fn_hamming(x: Tensor[DType.int64], y: Tensor[DType.int64]) -> Int64:
    
    # Initialize a variable 'hamming_distance' to zero, of type Int64
    var hamming_distance: Int64 = 0
    
    # Get the number of elements in tensor 'x' and assign it to 'n'
    let n = x.num_elements()
    
    # Iterate through the range of 'n' (from 0 to n-1)
    for i in range(n):
        
        # Check if the elements at position 'i' in tensors 'x' and 'y' are different
        if x[i] != y[i]:
            
            # If the elements are different, increment the 'hamming_distance' by 1
            hamming_distance += 1
    
    # Return the computed 'hamming_distance' value
    return hamming_distance


In [6]:
# Capture the current time before evaluation begins
var eval_begin = now()

# Call the 'mojo_fn_hamming' function with tensors 'x' and 'y' as arguments, and assign the result to 'mojo_fn_hamming'
let mojo_fn_hamming = mojo_fn_hamming(x, y)

# Capture the current time after evaluation ends
var eval_end = now()

# Print the value of 'mojo_fn_hamming' using 'print_formatter'
print_formatter("mojo_fn_hamming value", mojo_fn_hamming)

# Calculate and print the evaluation time in milliseconds using 'print_formatter'
print_formatter("mojo_fn_hamming time (ms)",Float64((eval_end - eval_begin)) / 1e6)

# Calculate and print the speedup factor
print("Speedup", (1000*secs) / (Float64((eval_end - eval_begin)) / 1e6))


mojo_fn_hamming value: 749999134.00000
mojo_fn_hamming time (ms): 608.70787
Speedup 0.9269859919668845


### Typed + Vectorized Approach

In [7]:
# Define an alias 'nelts' for the SIMD vector width for int64 data type
alias nelts = simdwidthof[DType.int64]()  

# Define a vectorized function 'mojo_fn_hamming_vectorized' to compute the Hamming distance between tensors 'x' and 'y'
fn mojo_fn_hamming_vectorized(x: Tensor[DType.int64], y: Tensor[DType.int64]) -> Int64:
    
    # Initialize a variable 'hamming_distance' to zero, of type Int64
    var hamming_distance: Int64 = 0
    
    # Get the number of elements in tensor 'x' and assign it to 'n'
    let n = x.num_elements()

    # Define a parameterized inner function 'count_differences' to compute differences within SIMD vectors
    @parameter
    fn count_differences[nelts: Int](i: Int):
        
        # Load a SIMD vector 'x_vec' from tensor 'x' starting at index 'i'
        let x_vec = x.simd_load[nelts](i)
        
        # Load a SIMD vector 'y_vec' from tensor 'y' starting at index 'i'
        let y_vec = y.simd_load[nelts](i)

        # Iterate through the range of 'nelts' (from 0 to nelts-1)
        for j in range(nelts):
            
            # Check if the elements at position 'j' in SIMD vectors 'x_vec' and 'y_vec' are different
            if x_vec[j] != y_vec[j]:
                
                # If the elements are different, increment the 'hamming_distance' by 1
                hamming_distance += 1

    # Apply vectorization to the 'count_differences' function over the range of 'n' with a step size of 'nelts'
    vectorize[nelts, count_differences](n)
    
    # Handle any remaining elements that were not processed by vectorized function
    for i in range(nelts * (n // nelts), n):
        
        # Check if the elements at position 'i' in tensors 'x' and 'y' are different
        if x[i] != y[i]:
            
            # If the elements are different, increment the 'hamming_distance' by 1
            hamming_distance += 1
        
    # Return the computed 'hamming_distance' value
    return hamming_distance


In [8]:
# Capture the current time before evaluation begins
var eval_begin = now()

# Call the 'mojo_fn_hamming_vectorized' function with tensors 'x' and 'y' as arguments, and assign the result to 'mojo_fn_hamming_vectorized'
let mojo_fn_hamming_vectorized = mojo_fn_hamming_vectorized(x,y)

# Capture the current time after evaluation ends
var eval_end = now()

# Print the value of 'mojo_fn_hamming_vectorized' using 'print_formatter'
print_formatter("mojo_fn_hamming_vectorized value", mojo_fn_hamming_vectorized)

# Calculate and print the evaluation time in milliseconds using 'print_formatter'
print_formatter("mojo_fn_hamming_vectorized time (ms)",Float64((eval_end - eval_begin)) / 1e6)

# Calculate and print the speedup factor
print("Speedup", (1000*secs) / (Float64((eval_end - eval_begin)) / 1e6))

mojo_fn_hamming_vectorized value: 749999134.00000
mojo_fn_hamming_vectorized time (ms): 887.50741
Speedup 0.635784738007833


### Typed + Vectorized + Parallelized Approach

In [9]:
# Define an alias 'nelts' for the SIMD vector width for int64 data type
alias nelts = simdwidthof[DType.int64]()

# Define a parallelized function 'mojo_fn_hamming_parallelized' to compute the Hamming distance between tensors 'x' and 'y'
fn mojo_fn_hamming_parallelized(x: Tensor[DType.int64], y: Tensor[DType.int64], rt: Runtime) -> Int64:
    
    # Initialize a variable 'hamming_distance' to zero, of type Int64
    var hamming_distance: Int64 = 0
    
    # Get the number of elements in tensor 'x' and assign it to 'n'
    let n = x.num_elements()
    
    # Get the number of available cores and assign it to 'num_threads'
    let num_threads: Int = num_cores()

    # Define a parameterized inner function 'partial_sum' to compute the Hamming distance in parallel
    @parameter
    fn partial_sum(thread_idx: Int):
        
        # Determine the range of indices each thread will process
        let step = n // num_threads
        let start = thread_idx * step
        let end = min((thread_idx + 1) * step, n)
        
        # Initialize a variable 'local_hamming_distance' to zero, of type Int64, to store the Hamming distance computed by this thread
        var local_hamming_distance: Int64 = 0

        # Define a parameterized inner function 'count_differences' to compute differences within SIMD vectors
        @parameter
        fn count_differences[nelts: Int](i: Int):
            
            # Load a SIMD vector 'x_vec' from tensor 'x' starting at index 'i'
            let x_vec = x.simd_load[nelts](i)
            
            # Load a SIMD vector 'y_vec' from tensor 'y' starting at index 'i'
            let y_vec = y.simd_load[nelts](i)

            # Iterate through the range of 'nelts' (from 0 to nelts-1)
            for j in range(nelts):
                
                # Check if the elements at position 'j' in SIMD vectors 'x_vec' and 'y_vec' are different
                if x_vec[j] != y_vec[j]:
                    
                    # If the elements are different, increment the 'local_hamming_distance' by 1
                    local_hamming_distance += 1

        # Apply vectorization to the 'count_differences' function over the range of indices assigned to this thread
        vectorize[nelts, count_differences](end - start)

        # Handle any remaining elements that were not processed by vectorized function
        for i in range(nelts * ((end - start) // nelts), end - start):
            
            # Check if the elements at position 'i' in tensors 'x' and 'y' are different
            if x[i] != y[i]:
                
                # If the elements are different, increment the 'local_hamming_distance' by 1
                local_hamming_distance += 1

        # Accumulate the 'local_hamming_distance' computed by this thread to the global 'hamming_distance'
        hamming_distance += local_hamming_distance

    # Execute the 'partial_sum' function in parallel across multiple threads
    parallelize[partial_sum](rt, num_threads)

    # Return the computed 'hamming_distance' value
    return hamming_distance


In [10]:
# Capture the current time before evaluation begins
var eval_begin = now()

# Call the 'mojo_fn_hamming_parallelized' function with tensors 'x' and 'y', and the number of cores as arguments, and assign the result to 'mojo_fn_hamming_parallelized'
let mojo_fn_hamming_parallelized = mojo_fn_hamming_parallelized(x,y,num_cores())

# Capture the current time after evaluation ends
var eval_end = now()

# Print the value of 'mojo_fn_hamming_parallelized' using 'print_formatter'
print_formatter("mojo_fn_hamming_parallelized value", mojo_fn_hamming_parallelized)

# Calculate and print the evaluation time in milliseconds using 'print_formatter'
print_formatter("mojo_fn_hamming_parallelized time (ms)", Float64((eval_end - eval_begin)) / 1e6)

# Calculate and print the speedup factor
print("Speedup", (1000*secs) / (Float64((eval_end - eval_begin)) / 1e6))


mojo_fn_hamming_parallelized value: 750018592.00000
mojo_fn_hamming_parallelized time (ms): 66.08831
Speedup 8.538024742951903
