# Collect data from runs

In [2]:
import logging
import sys
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from profiling import get_memory, get_time
from plotting import plot_memory, plot_time
from chunkdot.cosine_similarity_top_k import cosine_similarity_top_k
from chunkdot.utils import get_memory_available, warm_up_chunked_dot

In [3]:
warm_up_chunked_dot()

In [4]:
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)

In [5]:
get_memory_available() / 1E9

18.824343552

In [5]:
import math
M = 5E9
math.sqrt(M / 8)

25000.0

In [13]:
embedding_dim = 128
max_n_items = .5E6
top_k = 100
max_memory_to_use = 20E9
n_steps = 20

step = int(max_n_items / n_steps)
n_items = range(step, int(max_n_items) + 1, step)
numba_function_kwargs = {"top_k": top_k, "max_memory_to_use": max_memory_to_use, "force_memory": True}

max_memory = {
    "numpy": [],
    "numba": []
}
matrix_memory = {
    "numpy": [],
    "numba": []
}
execution_time = {
    "numpy": [],
    "numba": []
}
for i, n in enumerate(n_items):
    print(f"\nN: {n} - {i + 1}/{len(n_items)}")
    if n > 50000:
        print("\n\tSklean memory: Skipped")
        print("\n\tSklean time: Skipped")
        max_size, matrix_size, _time = None, None, None
    else:
        print("\n\tSklean memory")
        max_size, matrix_size = get_memory(cosine_similarity, n_items=n, embedding_dim=embedding_dim)
        print(f"\tMax:{max_size / 1E9:0.2f}GB  Matrix:{matrix_size / 1E9:0.2f}GB")
        print("\n\tSklean time")
        _time = get_time(cosine_similarity, n_items=n, embedding_dim=embedding_dim, n_iterations=1)
        print(f"\t{_time:0.2f} sec")
    max_memory["numpy"].append(max_size), matrix_memory["numpy"].append(matrix_size), execution_time["numpy"].append(_time)
    
    print("\n\tCustom memory")
    max_size, matrix_size = get_memory(cosine_similarity_top_k, n_items=n, embedding_dim=embedding_dim, function_kwargs=numba_function_kwargs)
    print(f"\tMax:{max_size / 1E9:0.2f}GB  Matrix:{matrix_size / 1E9:0.2f}GB")
    print("\n\tCustom time")
    _time = get_time(cosine_similarity_top_k, n_items=n, embedding_dim=embedding_dim, function_kwargs=numba_function_kwargs, n_iterations=1)
    print(f"\t{_time:0.2f} sec")
    max_memory["numba"].append(max_size), matrix_memory["numba"].append(matrix_size), execution_time["numba"].append(_time)


N: 25000 - 1/20

	Sklean memory
	Max:5.03GB  Matrix:5.00GB

	Sklean time
	4.79 sec

	Custom memory
DEBUG:chunkdot.utils:Memory available: 20.04 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 3114
	Max:10.04GB  Matrix:0.03GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 20.05 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 3114
	4.29 sec

N: 50000 - 2/20

	Sklean memory
	Max:20.05GB  Matrix:20.00GB

	Sklean time
	23.24 sec

	Custom memory
DEBUG:chunkdot.utils:Memory available: 28.83 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 1552
	Max:15.79GB  Matrix:0.06GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 28.18 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Nu

DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 185
	921.81 sec

N: 425000 - 17/20

	Sklean memory: Skipped

	Sklean time: Skipped

	Custom memory
DEBUG:chunkdot.utils:Memory available: 19.45 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 173




	Max:15.29GB  Matrix:0.51GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 18.16 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 173




	1256.46 sec

N: 450000 - 18/20

	Sklean memory: Skipped

	Sklean time: Skipped

	Custom memory
DEBUG:chunkdot.utils:Memory available: 18.87 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 163




	Max:15.32GB  Matrix:0.54GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 18.57 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 163




	1346.96 sec

N: 475000 - 19/20

	Sklean memory: Skipped

	Sklean time: Skipped

	Custom memory
DEBUG:chunkdot.utils:Memory available: 18.50 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 154




	Max:16.53GB  Matrix:0.57GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 19.65 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 154




	1477.57 sec

N: 500000 - 20/20

	Sklean memory: Skipped

	Sklean time: Skipped

	Custom memory
DEBUG:chunkdot.utils:Memory available: 19.40 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 145




	Max:15.30GB  Matrix:0.60GB

	Custom time
DEBUG:chunkdot.utils:Memory available: 18.96 GB
DEBUG:chunkdot.utils:Maximum memory to use: 20.00 GB
DEBUG:chunkdot.utils:Number of threads: 16
DEBUG:chunkdot.utils:Chunk size per thread: 145




	1465.93 sec


In [14]:
import pandas as pd
max_memory_to_use_GB = int(max_memory_to_use / 1E9)

dfs = {
    "max_memory (GB)": pd.DataFrame.from_dict(max_memory) / 1E9,
    "matrix_memory (GB)": pd.DataFrame.from_dict(matrix_memory) / 1E9,
    "execution_time (s)": pd.DataFrame.from_dict(execution_time),
}
df = pd.concat(dfs.values(), keys=dfs.keys(), axis=1)
df = df.assign(n_items=n_items, embedding_dim=embedding_dim, max_memory_to_use_GB=max_memory_to_use_GB, top_k=top_k).set_index("n_items")
df.to_csv(f"metrics_embedding_dim-{embedding_dim}-top_k-{top_k}-max_memory_to_use-{max_memory_to_use_GB}GB.csv")
df

Unnamed: 0_level_0,max_memory (GB),max_memory (GB),matrix_memory (GB),matrix_memory (GB),execution_time (s),execution_time (s),embedding_dim,max_memory_to_use_GB,top_k
Unnamed: 0_level_1,numpy,numba,numpy,numba,numpy,numba,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
n_items,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
25000,5.025604,10.03799,5.0,0.0301,4.790732,4.294329,128,20,100
50000,20.051204,15.79445,20.0,0.0602,23.236377,12.266604,128,20,100
75000,,17.532008,,0.0903,,25.867822,128,20,100
100000,,17.528808,,0.1204,,47.658096,128,20,100
125000,,16.927008,,0.1505,,73.298883,128,20,100
150000,,17.558408,,0.1806,,107.688377,128,20,100
175000,,16.356207,,0.2107,,152.810402,128,20,100
200000,,17.587208,,0.2408,,214.676524,128,20,100
225000,,16.349407,,0.2709,,268.827176,128,20,100
250000,,16.398007,,0.301,,541.011935,128,20,100


In [None]:
# plot_memory(n_items, max_memory, matrix_memory)
# plt.show()
# plot_time(n_items, execution_time)
# plt.show()

In [None]:
# from matplotlib import pyplot as plt
# %matplotlib inline

# n_items_big = range(0, int(1E6) + 1, int(1E5))
# plt.figure(facecolor='white')
# plt.scatter(n_items_big, [8 * n**2 / 1E9 for n in n_items_big], color="green", marker="*", label="Memory in GB")
# plt.legend(loc="upper left")
# plt.show()

# plt.figure(facecolor='white')
# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)
# plt.scatter(n_items_big, [(coeff_0 + coeff_1 * n + coeff_2 * n **2) / 3600 for n in n_items_big], color="green", marker="*", label="Hours")
# plt.legend(loc="upper left")
# plt.show()

# coeff_2, coeff_1, coeff_0 = np.polyfit(n_items, execution_time, 2)


$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
c_1 & c_2 & c_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}_{n_{items} \times M}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3
\end{bmatrix}_{M \times n_{items} }
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
c \cdot a & c \cdot b & c \cdot c & ... & c \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

$
\begin{bmatrix}
a_1 & a_2 & a_3 \\
b_1 & b_2 & b_3 \\
... & ... & ... \\
k_1 & k_2 & k_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
a \cdot a & a \cdot b & a \cdot c & ... & a \cdot z \\
b \cdot a & b \cdot b & b \cdot c & ... & b \cdot z \\
... & ... & ... & ... & ... \\
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
\\
\begin{bmatrix}
k_1 & k_2 & k_3 \\
l_1 & l_2 & l_3 \\
... & ... & ... \\
z_1 & z_2 & z_3 
\end{bmatrix}
\times
\begin{bmatrix}
a_1 & b_1 & c_1 & ... & z_1 \\
a_2 & b_2 & c_2 & ... & z_2 \\
a_3 & b_3 & c_3 & ... & z_3 \\
\end{bmatrix}
=
\begin{bmatrix}
k \cdot a & k \cdot b & k \cdot c & ... & k \cdot z \\
l \cdot a & l \cdot b & l \cdot c & ... & l \cdot z \\
... & ... & ... & ... & ... \\
z \cdot a & z \cdot b & z \cdot c & ... & z \cdot z \\
\end{bmatrix}_{n_{items} \times n_{items}}
\leftarrow\text{Similarity Matrix}
$

In [None]:
def memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads):
    sparse_matrix_memory = (n_items * top_k) * 2 + n_items  # data + indices + indptr
    per_thread_memory = (chunk_size * n_items) * 2 * n_threads  # (chunk dot product result + argpartition output matrix) x number of threads
    return (sparse_matrix_memory + per_thread_memory) * 8 / 1E9

chunk_size = 2981
n_items = 1E6
embedding_size = 100
top_k = 100
n_threads = 1
memory_cosine_similarity_top_k(n_items, embedding_size, top_k, chunk_size, n_threads)