## 🚀 Compare speed of Distance Functions 🚀

In [None]:
%matplotlib inline
import matplotlib
matplotlib.use("module://matplotlib_inline.backend_inline")

In [None]:
from task import (
    distance_l2_CUPY,
    distance_l2_triton,
    distance_l2_cpu,
    distance_l2_torch,
    distance_manhattan_CUPY,
    distance_l1_triton,
    distance_manhattan_cpu,
    distance_manhattan_torch,
    distance_cosine_CUPY,
    distance_cosine_triton,
    distance_cosine_cpu,
    distance_cosine_torch,
    distance_dot_CUPY,
    distance_dot_triton,
    distance_dot_cpu,
    distance_dot_torch,
    test_distance_wrapper
)
import numpy as np
import seaborn as sns

In [None]:
np.random.seed(1967)
vector_sizes = [2**i for i in range(1, 27)]
X_array = [np.random.rand(size,) for size in vector_sizes]
Y_array = [np.random.rand(size,) for size in vector_sizes]

In [None]:
functions = {
    "L2": [
        distance_l2_CUPY,
        distance_l2_triton,
        distance_l2_cpu,
        distance_l2_torch,
    ],
    "L1": [
        distance_manhattan_CUPY,
        distance_l1_triton,
        distance_manhattan_cpu,
        distance_manhattan_torch,
    ],
    "Cosine": [
        distance_cosine_CUPY,
        distance_cosine_triton,
        distance_cosine_cpu,
        distance_cosine_torch,
    ],
    "Dot Product": [
        distance_dot_CUPY,
        distance_dot_triton,
        distance_dot_cpu,
        distance_dot_torch,
    ],
}

In [None]:
results_list = {}
for function_type, function_list in functions.items():
    results_list[function_type]= []
    for function in function_list:
        inner_results_item = []
        for i in range(len(vector_sizes)):
            size = vector_sizes[i]
            X = X_array[i]
            Y = Y_array[i]
            result = test_distance_wrapper(function, X, Y, repeat=25)
            inner_results_item.append(result[2])
        results_item = {function.__name__: inner_results_item}
        results_list[function_type].append(results_item)
for function_type, function_list in results_list.items():
    print(function_type)
    for function in function_list:
        for function_name, result in function.items():
            print(f"{function_name}: {result}")
    print()


In [None]:
# #Print four graphs: Each graph should have 3 lines CUPY, Triton, CPU
# #Graph should be in log-log scale but the x-axis should be power of 2
# #Plotting the results
# import matplotlib.pyplot as plt
# def plot_results(results_list):
#     for function_type, function_list in results_list.items():
#         plt.figure()
#         for function in function_list:
#             for function_name, result in function.items():
#                 plt.plot(vector_sizes, result, label=function_name)
#         plt.xscale('log', base=2)
#         plt.yscale('log')
#         plt.xlabel('Vector Size')
#         plt.ylabel('Time (s)')
#         plt.title(function_type)
#         plt.legend()
#         plt.show()
# plot_results(results_list)
# #Plotting the results


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

# Set a modern seaborn style
sns.set_theme(style="whitegrid", font_scale=1.2)

# Optional: use a specific color palette
colors = sns.color_palette("colorblind")  # good for accessibility

def plot_results(results_list):
    for idx, (function_type, function_list) in enumerate(results_list.items()):
        plt.figure(figsize=(8, 7.5))  # bigger, cleaner layout

        color_idx = 0
        for function in function_list:
            for function_name, result in function.items():
                plt.plot(
                    vector_sizes,
                    result,
                    label=function_name.replace('_', ' ').upper(),
                    color=colors[color_idx % len(colors)],
                    linewidth=2.5,
                    marker='o',
                    markersize=5,
                )
                color_idx += 1

        plt.xscale('log', base=2)
        # plt.yscale('log')

        # Log ticks with base-2 labels
        plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f"$2^{{{int(np.log2(x))}}}$"))

        plt.xlabel("Vector Size (log scale)", labelpad=10)
        plt.ylabel("Time (s) (log scale, descending)", labelpad=10)
        plt.gca().invert_yaxis() 
        plt.title(f"Average Time to compute {function_type} distance between two random Numpy vectors", fontsize=14, weight="bold")

        plt.legend(title="Implementation", loc="best", frameon=True)
        plt.tight_layout()
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
                # Add caption below the plot
        plt.figtext(
            0.5, -0.12,
            "In the case of the GPU accelerated libraries, these timings are inclusive of the memory transfer in the GPU. "
            "As we can see here, CPU performance is better at lower dimensions and scales similarly with CuPy and Triton as the memory increases.\n\n"
            "We note here that this is because there is only one distance calculation being carried out and, despite parallelising across segments within the vectors "
            "and reducing these partial sums, the memory overhead involved means that there is no significant benefit from utilizing the GPU for a single distance calculation.",
            wrap=True,
            ha="center",
            fontsize=10
        )
        plt.show()
plot_results(results_list)