In [1]:
%cd ~/drive/gdrive/projects/effcossim

/home/ngshya/drive/gdrive/projects/effcossim


In [2]:
from numpy import array
from time import time
from scipy.sparse import random
from effcossim.pcs import pairwise_cosine_similarity

In [3]:
A = array([
    [1.0, 2, 3], 
    [0, 1, 2],
    [5, 1, 1]
])

B = array([
    [1, 1, 2], 
    [0, 1, 2],
    [5, 0, 1], 
    [0, 0, 4]
])

In [4]:
M1 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=False, 
    dense_output=True
)

print(M1)
del M1

[[0.98198051 0.95618289 0.41931393 0.80178373]
 [0.91287093 1.         0.1754116  0.89442719]
 [0.62853936 0.25819889 0.98130676 0.19245009]]


In [5]:
M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.01, 
    n_jobs=1, 
    dense_output=True
)

print(M2)
del M2

[[0.98198051 0.95618289 0.41931393 0.80178373]
 [0.91287093 1.         0.1754116  0.89442719]
 [0.62853936 0.25819889 0.98130676 0.19245009]]


In [6]:
%%time

A = random(
    m=10000,       
    n=5000, 
    density=0.3, 
    format='csr', 
    random_state=1102
)
B = random(
    m=10000, 
    n=5000, 
    density=0.3, 
    format='csr', 
    random_state=1102
)

CPU times: user 6.17 s, sys: 230 ms, total: 6.4 s
Wall time: 6.38 s


In [7]:
%%time

M1 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=False, 
    dense_output=False
)

print("Output size:", M1.data.nbytes)
del M1

Output size: 800000000
CPU times: user 1min 53s, sys: 140 ms, total: 1min 53s
Wall time: 1min 53s


In [8]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=1, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 1min 35s, sys: 121 ms, total: 1min 35s
Wall time: 1min 35s


In [9]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=2, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 2min 3s, sys: 136 ms, total: 2min 4s
Wall time: 1min 2s


In [10]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=4, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 3min 18s, sys: 156 ms, total: 3min 18s
Wall time: 51 s


In [11]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=6, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 4min 50s, sys: 215 ms, total: 4min 50s
Wall time: 51.8 s


In [12]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=8, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 6min 27s, sys: 479 ms, total: 6min 27s
Wall time: 53.3 s


In [13]:
%%time

M2 = pairwise_cosine_similarity(
    A=A, B=B, 
    efficient=True, 
    n_top=10, 
    lower_bound=0.5, 
    n_jobs=10, 
    dense_output=False
)

print("Output size:", M2.data.nbytes)
del M2

Output size: 80000
CPU times: user 6min 26s, sys: 483 ms, total: 6min 26s
Wall time: 53.9 s
