# Normal Numpy script

In [1]:
import logging
import sys
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from profiling import get_memory, get_time
from plotting import plot_memory, plot_time
from cosine_simiilarity_top_k import cosine_similarity_top_k, get_memory_available, warm_up_chunked_dot

In [2]:
a = np.array([1,2,3])
a.reshape(-1, a.size).repeat(2, 0).flatten()

array([1, 2, 3, 1, 2, 3])

In [3]:
m = np.random.randn(3, 2)
top_k = 3

expected = cosine_similarity(m)
calculated = cosine_similarity_top_k(m, top_k)
calculated.toarray()

np.testing.assert_array_almost_equal(calculated.toarray(), expected)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Failed in nopython mode pipeline (step: nopython frontend)
- Resolution failure for literal arguments:
No implementation of function Function(<function array_repeat at 0x1387780d0>) found for signature:

 >>> array_repeat(array(int64, 2d, C), int64, Literal[int](0))

There are 2 candidate implementations:
    - Of which 2 did not match due to:
    Overload in function 'array_repeat': File: numba/np/arrayobj.py: Line 2399.
      With argument(s): '(array(int64, 2d, C), int64, int64)':
     Rejected as the implementation raised a specific error:
       TypingError: too many positional arguments
  raised from /Users/agundrod/Library/Caches/pypoetry/virtualenvs/cosine-similarity-top-k-rrxFsLlH-py3.9/lib/python3.9/site-packages/numba/core/typing/templates.py:791

- Resolution failure for non-literal arguments:
No implementation of function Function(<function array_repeat at 0x1387780d0>) found for signature:

 >>> array_repeat(array(int64, 2d, C), int64, int64)

There are 2 candidate implementations:
   - Of which 2 did not match due to:
   Overload in function 'array_repeat': File: numba/np/arrayobj.py: Line 2399.
     With argument(s): '(array(int64, 2d, C), int64, int64)':
    Rejected as the implementation raised a specific error:
      TypingError: too many positional arguments
  raised from /Users/agundrod/Library/Caches/pypoetry/virtualenvs/cosine-similarity-top-k-rrxFsLlH-py3.9/lib/python3.9/site-packages/numba/core/typing/templates.py:791


During: resolving callee type: BoundFunction((<class 'numba.core.types.npytypes.Array'>, 'repeat') for array(int64, 2d, C))
During: typing of call at /Users/agundrod/personal/cosine_similarity_top_k/cosine_simiilarity_top_k.py (79)


File "cosine_simiilarity_top_k.py", line 79:
def _to_sparse(matrix, top_k):
    <source elided>
        a = np.arange(n_cols)
        indices = a.reshape(-1, a.size).repeat(n_cols, 0).flatten()
        ^

During: resolving callee type: type(CPUDispatcher(<function _to_sparse at 0x138899a60>))
During: typing of call at /Users/agundrod/personal/cosine_similarity_top_k/cosine_simiilarity_top_k.py (102)

During: resolving callee type: type(CPUDispatcher(<function _to_sparse at 0x138899a60>))
During: typing of call at /Users/agundrod/personal/cosine_similarity_top_k/cosine_simiilarity_top_k.py (102)


File "cosine_simiilarity_top_k.py", line 102:
def chunked_dot(matrix_left, matrix_right, top_k, chunk_size):
    <source elided>
        chunk_m = np.dot(matrix_left[start_row_i:end_row_i], matrix_right)
        values, indices = _to_sparse(chunk_m, top_k)
        ^


In [2]:
warm_up_chunked_dot()

In [3]:
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)

In [4]:
get_memory_available() / 1E9

21.281067008

In [9]:
import math
M = 5E9
math.sqrt(M / 8)

25000.0

In [14]:
embedding_size = 100
max_n_items = 5E4
top_k = 100
max_memory_to_use = 5E9

step = int(max_n_items / 10)
n_items = range(step, int(max_n_items) + 1, step)
numba_function_kwargs = {"top_k": top_k, "max_memory_to_use": max_memory_to_use, "force_memory": True}

max_memory = {
    "numpy": [],
    "numba": []
}
matrix_memory = {
    "numpy": [],
    "numba": []
}
execution_time = {
    "numpy": [],
    "numba": []
}
for i, n in enumerate(n_items):
    print(f"\nN: {n} - {i + 1}/{len(n_items)}")
    if n > 50000:
        max_size, matrix_size, _time = None, None, None
    else:
        print("\n\tSklean memory")
        max_size, matrix_size = get_memory(cosine_similarity, n_items=n, embedding_size=embedding_size)
        print("\n\tSklean time")
        _time = get_time(cosine_similarity, n_items=n, embedding_size=embedding_size, n_iterations=1)
    max_memory["numpy"].append(max_size), matrix_memory["numpy"].append(matrix_size), execution_time["numpy"].append(_time)
    
    print("\n\tCustom memory")
    max_size, matrix_size = get_memory(cosine_similarity_top_k, n_items=n, embedding_size=embedding_size, function_kwargs=numba_function_kwargs)
    print("\n\tCustom time")
    _time = get_time(cosine_similarity_top_k, n_items=n, embedding_size=embedding_size, function_kwargs=numba_function_kwargs, n_iterations=1)
    max_memory["numba"].append(max_size), matrix_memory["numba"].append(matrix_size), execution_time["numba"].append(_time)


N: 5000 - 1/10

	Sklean memory

	Sklean time

	Custom memory
DEBUG:cosine_simiilarity_top_k:Memory available: 17.05 GB
DEBUG:cosine_simiilarity_top_k:Using memory: 5.00 GB
DEBUG:cosine_simiilarity_top_k:Number of threads: 16
DEBUG:cosine_simiilarity_top_k:Chunk size per thread: 3893

	Custom time
DEBUG:cosine_simiilarity_top_k:Memory available: 17.05 GB
DEBUG:cosine_simiilarity_top_k:Using memory: 5.00 GB
DEBUG:cosine_simiilarity_top_k:Number of threads: 16
DEBUG:cosine_simiilarity_top_k:Chunk size per thread: 3893

N: 10000 - 2/10

	Sklean memory

	Sklean time

	Custom memory
DEBUG:cosine_simiilarity_top_k:Memory available: 17.05 GB
DEBUG:cosine_simiilarity_top_k:Using memory: 5.00 GB
DEBUG:cosine_simiilarity_top_k:Number of threads: 16
DEBUG:cosine_simiilarity_top_k:Chunk size per thread: 1940

	Custom time
DEBUG:cosine_simiilarity_top_k:Memory available: 17.05 GB
DEBUG:cosine_simiilarity_top_k:Using memory: 5.00 GB
DEBUG:cosine_simiilarity_top_k:Number of threads: 16
DEBUG:cosine_s

In [15]:
import pandas as pd

dfs = {
    "max_memory (GB)": pd.DataFrame.from_dict(max_memory) / 1E9,
    "matrix_memory (GB)": pd.DataFrame.from_dict(matrix_memory) / 1E9,
    "execution_time (s)": pd.DataFrame.from_dict(execution_time),
}
df = pd.concat(dfs.values(), keys=dfs.keys(), axis=1)
df = df.assign(n_items=n_items, embedding_size=embedding_size, max_memory_to_use_GB=int(max_memory_to_use / 1E9), top_k=top_k).set_index("n_items")
df

Unnamed: 0_level_0,max_memory (GB),max_memory (GB),matrix_memory (GB),matrix_memory (GB),execution_time (s),execution_time (s),embedding_size,max_memory_to_use_GB,top_k
Unnamed: 0_level_1,numpy,numba,numpy,numba,numpy,numba,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
n_items,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
5000,0.204001,0.410741,0.2,0.00804,0.119651,0.548146,100,5,100
10000,0.808004,1.57322,0.8,0.01608,0.756045,0.638195,100,5,100
15000,1.812004,3.627898,1.8,0.02412,1.765837,0.993033,100,5,100
20000,3.216004,4.895763,3.2,0.03216,3.120019,1.589477,100,5,100
25000,5.020004,4.964311,5.0,0.0402,4.917111,2.290792,100,5,100
30000,7.224004,4.955891,7.2,0.04824,7.383698,3.318514,100,5,100
35000,9.828004,4.948638,9.8,0.05628,10.315705,3.887616,100,5,100
40000,12.832004,4.93829,12.8,0.06432,14.240721,5.474714,100,5,100
45000,16.236004,4.781077,16.2,0.07236,16.134545,6.18037,100,5,100
50000,20.040004,4.930615,20.0,0.0804,20.283604,8.334299,100,5,100


In [10]:
df.to_csv(f"metrics_embedding_size-{embedding_size}_top_k-{top_k}_max_memory_bytes-{max_memory_bytes}.csv")

In [None]:
# plot_memory(n_items, max_memory, matrix_memory)
# plt.show()
# plot_time(n_items, execution_time)
# plt.show()

In [None]:
# from matplotlib import pyplot as plt
# %matplotlib inline

# n_items_big = range(0, int(1E6) + 1, int(1E5))
# plt.figure(facecolor='white')
# plt.scatter(n_items_big, [8 * n**2 / 1E9 for n in n_items_big], color="green", marker="*", label="Memory in GB")
# plt.legend(loc="upper left")
# plt.show()

# plt.figure(facecolor='white')
# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)
# plt.scatter(n_items_big, [(coeff_0 + coeff_1 * n + coeff_2 * n **2) / 3600 for n in n_items_big], color="green", marker="*", label="Hours")
# plt.legend(loc="upper left")
# plt.show()

# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)


$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3
\end{bmatrix}_{M \times n_{items} }
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
c \cdot a & c \cdot b & c \cdot c & ... & c \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
... & ... & ... \\
k_1 & k_2 & k_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
... & ... & ... & ... & ... \\
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
\\
\begin{bmatrix}
k_1 & k_2 & k_3 \\
l_1 & l_2 & l_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
l \cdot a & l \cdot b & l \cdot c & ... & l \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

In [None]:
def memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads):
    sparse_matrix_memory = (n_items * top_k) * 2 + n_items  # data + indices + indptr
    per_thread_memory = (chunk_size * n_items) * 2 * n_threads  # (chunk dot product result + argpartition output matrix) x number of threads
    return (sparse_matrix_memory + per_thread_memory) * 8 / 1E9

chunk_size = 2981
n_items = 1E6
embedding_size = 100
top_k = 100
n_threads = 1
memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads)