# Normal Numpy script

In [1]:
import logging
import sys
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from cosine_similarity_top_k.profiling import get_memory, get_time
from cosine_similarity_top_k.plotting import plot_memory, plot_time
from cosine_similarity_top_k.cosine_similarity_top_k import cosine_similarity_top_k, get_memory_available, warm_up_chunked_dot

In [2]:
warm_up_chunked_dot()

In [3]:
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)

In [4]:
get_memory_available() / 1E9

21.154803712

In [5]:
import math
M = 5E9
math.sqrt(M / 8)

25000.0

In [None]:
embedding_size = 100
max_n_items = 1E6
top_k = 100
max_memory_to_use = 20E9

step = int(max_n_items / 10)
n_items = range(step, int(max_n_items) + 1, step)
numba_function_kwargs = {"top_k": top_k, "max_memory_to_use": max_memory_to_use, "force_memory": True}

max_memory = {
    "numpy": [],
    "numba": []
}
matrix_memory = {
    "numpy": [],
    "numba": []
}
execution_time = {
    "numpy": [],
    "numba": []
}
for i, n in enumerate(n_items):
    print(f"\nN: {n} - {i + 1}/{len(n_items)}")
    if n > 50000:
        max_size, matrix_size, _time = None, None, None
    else:
        print("\n\tSklean memory")
        max_size, matrix_size = get_memory(cosine_similarity, n_items=n, embedding_size=embedding_size)
        print("\n\tSklean time")
        _time = get_time(cosine_similarity, n_items=n, embedding_size=embedding_size, n_iterations=1)
    max_memory["numpy"].append(max_size), matrix_memory["numpy"].append(matrix_size), execution_time["numpy"].append(_time)
    
    print("\n\tCustom memory")
    max_size, matrix_size = get_memory(cosine_similarity_top_k, n_items=n, embedding_size=embedding_size, function_kwargs=numba_function_kwargs)
    print("\t", max_size / 1E9, max_size / 1E9)
    print("\n\tCustom time")
    _time = get_time(cosine_similarity_top_k, n_items=n, embedding_size=embedding_size, function_kwargs=numba_function_kwargs, n_iterations=1)
    print("\t", _time)
    max_memory["numba"].append(max_size), matrix_memory["numba"].append(matrix_size), execution_time["numba"].append(_time)


N: 100000 - 1/10

	Custom memory
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 20.03 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 768
	 18.693675867 18.693675867

	Custom time
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 20.72 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 768
	 51.086509598999996

N: 200000 - 2/10

	Custom memory
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 20.53 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG



	 900.758530351

N: 500000 - 5/10

	Custom memory
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 20.18 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 143
	 17.308008249 17.308008249

	Custom time
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 20.26 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 143
	 1420.8756491470003

N: 600000 - 6/10

	Custom memory
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 19.06 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of t



	 15.566407274 15.566407274

	Custom time
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 17.77 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 117




	 2109.7868845880002

N: 700000 - 7/10

	Custom memory
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Memory available: 17.26 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Using memory: 20.00 GB
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Number of threads: 16
DEBUG:cosine_similarity_top_k.cosine_similarity_top_k:Chunk size per thread: 99




In [None]:
import pandas as pd

dfs = {
    "max_memory (GB)": pd.DataFrame.from_dict(max_memory) / 1E9,
    "matrix_memory (GB)": pd.DataFrame.from_dict(matrix_memory) / 1E9,
    "execution_time (s)": pd.DataFrame.from_dict(execution_time),
}
df = pd.concat(dfs.values(), keys=dfs.keys(), axis=1)
df = df.assign(n_items=n_items, embedding_size=embedding_size, max_memory_to_use_GB=int(max_memory_to_use / 1E9), top_k=top_k).set_index("n_items")
df

In [None]:
df.to_csv(f"metrics_embedding_size-{embedding_size}_top_k-{top_k}_max_memory_to_use-{int(max_memory_to_use / 1E9)}GB.csv")

In [None]:
# plot_memory(n_items, max_memory, matrix_memory)
# plt.show()
# plot_time(n_items, execution_time)
# plt.show()

In [None]:
# from matplotlib import pyplot as plt
# %matplotlib inline

# n_items_big = range(0, int(1E6) + 1, int(1E5))
# plt.figure(facecolor='white')
# plt.scatter(n_items_big, [8 * n**2 / 1E9 for n in n_items_big], color="green", marker="*", label="Memory in GB")
# plt.legend(loc="upper left")
# plt.show()

# plt.figure(facecolor='white')
# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)
# plt.scatter(n_items_big, [(coeff_0 + coeff_1 * n + coeff_2 * n **2) / 3600 for n in n_items_big], color="green", marker="*", label="Hours")
# plt.legend(loc="upper left")
# plt.show()

# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)


$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3
\end{bmatrix}_{M \times n_{items} }
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
c \cdot a & c \cdot b & c \cdot c & ... & c \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
... & ... & ... \\
k_1 & k_2 & k_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
... & ... & ... & ... & ... \\
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
\\
\begin{bmatrix}
k_1 & k_2 & k_3 \\
l_1 & l_2 & l_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
l \cdot a & l \cdot b & l \cdot c & ... & l \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

In [None]:
def memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads):
    sparse_matrix_memory = (n_items * top_k) * 2 + n_items  # data + indices + indptr
    per_thread_memory = (chunk_size * n_items) * 2 * n_threads  # (chunk dot product result + argpartition output matrix) x number of threads
    return (sparse_matrix_memory + per_thread_memory) * 8 / 1E9

chunk_size = 2981
n_items = 1E6
embedding_size = 100
top_k = 100
n_threads = 1
memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads)