In [None]:
import numpy as np
import time
#from pycuda.compiler import SourceModule
from numba import njit, prange
import tensorflow as tf
import torch

In [None]:
# Create a numpy array with 10M random 2D elements
random_2d_array = np.random.rand(10_000_000, 2)
# Define a point
point = np.array([0.5, 0.5])

In [None]:
# Calculate the Euclidean distance using a loop
min_distance = float('inf')
nearest_index_loop = -1
loop_start_time = time.time()
for i, element in enumerate(random_2d_array):
    distance = np.linalg.norm(element - point)
    if distance < min_distance:
        min_distance = distance
        nearest_index_loop = i
loop_end_time = time.time()
print("Nearest index (loop):", nearest_index_loop, "Distance:", min_distance)
print("Time taken by loop:", loop_end_time - loop_start_time, "seconds")

In [None]:
# Calculate the Euclidean distance between the point and all elements in the array
start_time = time.time()
distances = np.linalg.norm(random_2d_array - point, axis=1)
# Find the index of the nearest point
nearest_index = np.argmin(distances)
end_time = time.time()

print("Nearest index (numpy):", nearest_index, "Distance:", distances[nearest_index])
print("Time taken:", end_time - start_time, "seconds")


In [None]:
@njit(parallel=True)
def compute_distances_numba(array, point):
    distances = np.empty(array.shape[0], dtype=np.float32)
    for i in prange(array.shape[0]):
        dx = array[i, 0] - point[0]
        dy = array[i, 1] - point[1]
        distances[i] = np.sqrt(dx * dx + dy * dy)
    return distances

start_time = time.time()
distances_numba = compute_distances_numba(random_2d_array, point)
# Find the index of the nearest point
nearest_index_numba = np.argmin(distances_numba)
end_time = time.time()

print("Nearest index (Numba):", nearest_index_numba, "Distance:", distances_numba[nearest_index_numba])
print("Time taken by Numba:", end_time - start_time, "seconds")


In [None]:
import cupy as cp

# Transfer data to GPU
random_2d_array_cupy = cp.asarray(random_2d_array)
point_cupy = cp.asarray(point)

# Calculate the Euclidean distance using CuPy
start_time = time.time()
distances_cupy = cp.linalg.norm(random_2d_array_cupy - point_cupy, axis=1)
nearest_index_cupy = cp.argmin(distances_cupy).get()
end_time = time.time()

print("Nearest index (CuPy):", nearest_index_cupy, "Distance:", distances_cupy[nearest_index_cupy].get())
print("Time taken by CuPy:", end_time - start_time, "seconds")

In [None]:
from numba import cuda

@cuda.jit
def compute_distances_cuda(array, point, distances):
    idx = cuda.grid(1)
    if idx < array.shape[0]:
        dx = array[idx, 0] - point[0]
        dy = array[idx, 1] - point[1]
        distances[idx] = (dx * dx + dy * dy) ** 0.5

# Allocate memory on the device
array_device = cuda.to_device(random_2d_array)
point_device = cuda.to_device(point)
distances_device = cuda.device_array(random_2d_array.shape[0], dtype=np.float32)

# Launch the kernel
threads_per_block = 1024
blocks_per_grid = (random_2d_array.shape[0] + threads_per_block - 1) // threads_per_block
start_time = time.time()
compute_distances_cuda[blocks_per_grid, threads_per_block](array_device, point_device, distances_device)
cuda.synchronize()
end_time = time.time()

# Copy the result back to the host
distances_cuda = distances_device.copy_to_host()

# Find the index of the nearest point
nearest_index_cuda = np.argmin(distances_cuda)
print("Nearest index (CUDA):", nearest_index_cuda, "Distance:", distances_cuda[nearest_index_cuda])
print("Time taken by CUDA:", end_time - start_time, "seconds")

In [None]:
# Check if a GPU is available and move tensors to GPU if possible
if torch.cuda.is_available():
    print("GPU is available.")
# Convert numpy arrays to PyTorch tensors
random_2d_array_tensor = torch.from_numpy(random_2d_array).float()
point_tensor = torch.from_numpy(point).float()

# Calculate the Euclidean distance using PyTorch
start_time = time.time()
distances_tensor = torch.norm(random_2d_array_tensor - point_tensor, dim=1)
nearest_index_tensor = torch.argmin(distances_tensor).item()
end_time = time.time()

print("Nearest index (PyTorch):", nearest_index_tensor, "Distance:", distances_tensor[nearest_index_tensor].item())
print("Time taken by PyTorch:", end_time - start_time, "seconds")

In [None]:
# Calculate the Euclidean distance using TensorFlow
random_2d_array_tf = tf.convert_to_tensor(random_2d_array, dtype=tf.float32)
point_tf = tf.convert_to_tensor(point, dtype=tf.float32)

start_time = time.time()
distances_tf = tf.norm(random_2d_array_tf - point_tf, axis=1)
nearest_index_tf = tf.argmin(distances_tf).numpy()
end_time = time.time()

print("Nearest index (TensorFlow):", nearest_index_tf, "Distance:", distances_tf[nearest_index_tf].numpy())
print("Time taken by TensorFlow:", end_time - start_time, "seconds")

In [None]:
! "C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\vsdevcmd.bat"

try:
    import os
    os.add_dll_directory(os.path.join(os.environ['CUDA_PATH'], 'bin'))
except Exception:
    pass

In [None]:
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule


# Define the CUDA kernel
kernel_code = """
__global__ void compute_distances_pycuda(float *array, float *point, float *distances, int n) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < n) {
        float dx = array[2 * idx] - point[0];
        float dy = array[2 * idx + 1] - point[1];
        distances[idx] = sqrt(dx * dx + dy * dy);
    }
}
"""

# Compile the kernel
mod = SourceModule(kernel_code)
compute_distances_pycuda = mod.get_function("compute_distances_pycuda")

# Allocate memory on the device
array_device_pycuda = cuda.mem_alloc(random_2d_array.nbytes)
point_device_pycuda = cuda.mem_alloc(point.nbytes)
distances_device_pycuda = cuda.mem_alloc(random_2d_array.shape[0] * np.float32().nbytes)

# Copy data to the device
cuda.memcpy_htod(array_device_pycuda, random_2d_array.astype(np.float32))
cuda.memcpy_htod(point_device_pycuda, point.astype(np.float32))

# Launch the kernel
threads_per_block = 1024
blocks_per_grid = (random_2d_array.shape[0] + threads_per_block - 1) // threads_per_block
start_time = time.time()
compute_distances_pycuda(array_device_pycuda, point_device_pycuda, distances_device_pycuda, np.int32(random_2d_array.shape[0]), block=(threads_per_block, 1, 1), grid=(blocks_per_grid, 1))
cuda.Context.synchronize()
end_time = time.time()

# Copy the result back to the host
distances_pycuda = np.empty(random_2d_array.shape[0], dtype=np.float32)
cuda.memcpy_dtoh(distances_pycuda, distances_device_pycuda)

# Find the index of the nearest point
nearest_index_pycuda = np.argmin(distances_pycuda)
print("Nearest index (PyCUDA):", nearest_index_pycuda, "Distance:", distances_pycuda[nearest_index_pycuda])
print("Time taken by PyCUDA:", end_time - start_time, "seconds")