*Copyright 2023 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*

# Convert Euclidean distance calculation from Python to Mojo

In this simple example we'll calculate the Euclidean distance between two n-dimensional vectors a and b mathematically expressed as the L2-norm of the difference vector: $$ ||a-b||_2 $$

### Create 2 random n-dimensional numpy arrays in python

In [1]:
%%python
import time
import numpy as np
from math import sqrt
from timeit import timeit

n = 1000000
np.random.seed(42)
arr1_np = np.random.rand(n)
arr2_np = np.random.rand(n)

arr1_list = arr1_np.tolist()
arr2_list = arr2_np.tolist()

def print_formatter(string, value):
    print(f"{string}: {value}")

In [2]:
%%python
# Pure python iterative implementation
def python_dist(a,b):
    sq_dist = 0.0
    n = len(a)
    for i in range(n):
        diff = a[i]-b[i]
        sq_dist += diff*diff
    return sqrt(sq_dist)

secs = timeit(lambda: python_dist(arr1_list,arr2_list), number=5)/5
print("=== Pure Python Performance ===")
print_formatter("value", python_dist(arr1_list,arr2_list))
print_formatter("time (ms)", 1000*secs)

=== Pure Python Performance ===
value: 408.0496057187746
time (ms): 32.13715840001896


In [3]:
%%python
def python_numpy_dist(a,b):
    return np.linalg.norm(a-b)

secs = timeit(lambda: python_numpy_dist(arr1_np,arr2_np), number=5)/5
print("=== NumPy Performance ===")
print_formatter("value", python_numpy_dist(arr1_np,arr2_np))
print_formatter("time (ms)", 1000*secs)

=== NumPy Performance ===
value: 408.0496057187685
time (ms): 1.1309916000755038


In [4]:
from tensor import Tensor
from time import now
from math import sqrt

let n: Int = 1000000
alias dtype = DType.float64
var arr1_tensor = Tensor[dtype](n)
var arr2_tensor = Tensor[dtype](n)

for i in range(n):
    arr1_tensor[i] = arr1_np[i].to_float64()
    arr2_tensor[i] = arr2_np[i].to_float64()

In [5]:
fn mojo_dist(a: Tensor[dtype], b: Tensor[dtype]) -> Float64:
    var sq_dist:Float64 = 0.0
    let n = a.num_elements()
    for i in range(n):
        let diff = a[i]-b[i]
        sq_dist += diff*diff
    return sqrt(sq_dist)

let eval_begin = now()
let mojo_arr_sum = mojo_dist(arr1_tensor,arr2_tensor)
let eval_end = now()

print("=== Mojo Performance ===")
print_formatter("value", mojo_arr_sum)
print_formatter("time (ms)", Float64((eval_end - eval_begin)) / 1e6)

=== Mojo Performance ===
value: 408.0496057187746
time (ms): 1.956


In [6]:
from sys.info import simdwidthof
from algorithm import vectorize

alias simd_width = simdwidthof[DType.float64]()

fn mojo_dist_vectorized(a: Tensor[DType.float64], b: Tensor[DType.float64]) -> Float64:
    var sq_dist: Float64 = 0.0
    @parameter
    fn simd_norm[simd_width:Int](idx:Int):
        let diff = a.simd_load[simd_width](idx) - b.simd_load[simd_width](idx)
        sq_dist += (diff*diff).reduce_add()
    vectorize[2*simd_width, simd_norm](a.num_elements())
    return sqrt(sq_dist)

let eval_begin = now()
let mojo_arr_vec_sum = mojo_dist_vectorized(arr1_tensor,arr2_tensor)
let eval_end = now()

print_formatter("value", mojo_arr_vec_sum)
print_formatter("time (ms)",((eval_end - eval_begin)) / 1e6)

value: 408.049605718767
time (ms): 0.259
