# S_plus similarities package guide

# Index
##### 0. Import and setup notebook
##### 1. Common parameters in all similarities
##### 2. Base similarities
##### 3. Similarities with normalization
##### 4. Stochastic similarties: p3alpha and rp3beta
##### 5. Feature weights and user weigths
##### 6. S-plus similarity
##### 7. Others (dot product, s_plus)

# 0. Import and setup notebook

In [1]:
# import package
import recommenders.similarity.s_plus as s_plus
# import usefull package
import scipy.sparse as sp

In [2]:
# let's create a sparse matrix
a = sp.random(5, 4, density=0.5)
print(a.todense())

[[0.38500117 0.         0.37435622 0.89684889]
 [0.88340468 0.         0.         0.86869212]
 [0.81418451 0.14007778 0.         0.42853505]
 [0.         0.4386712  0.78135333 0.        ]
 [0.         0.         0.         0.        ]]


# 1. Common parameters in all similarities

In [3]:
#top k per row
k=4
#shrink, shrink term 
shrink = 1
#threshold, cut values under this value
threshold = 0.25
# binary, if true set non zeros value at value 1
binary = False
#target item, calculate only the rows needed
target_items = [1,2,3]
#verbose = 1 print the progress 0%->100% (rows/total_rows), verbose = 0 mute
verbose = 1

s = s_plus.dot_product_similarity(a,a.T,
                                  k=k, shrink=shrink, threshold=threshold,
                                  binary=binary, target_items=target_items,
                                  verbose=verbose
                                 )
print(s.todense())

Start preprocessing...Allocate memory per threads...Build coo matrix and remove zeros...                          Similarity done                     
[[0.         0.         0.         0.         0.        ]
 [0.5595987  0.7675149  0.5457597  0.         0.        ]
 [0.34889656 0.5457597  0.43308023 0.         0.        ]
 [0.         0.         0.         0.40147275 0.        ]
 [0.         0.         0.         0.         0.        ]]


# 2. Base similarity

In [4]:
# dot product similarity
s = s_plus.dot_product_similarity(a, a.T)
print(s.todense())

[[1.0927064  1.1191974  0.6977931  0.2925045  0.        ]
 [1.1191974  1.5350298  1.0915194  0.         0.        ]
 [0.6977931  1.0915194  0.86616045 0.06144809 0.        ]
 [0.2925045  0.         0.06144809 0.8029455  0.        ]
 [0.         0.         0.         0.         0.        ]]


# 3. Similarities with normalization

In [5]:
# cosine similarity asymmetric with alpha 0.3
s = s_plus.cosine_similarity(a, a.T, k=4)
print(s.todense())

[[1.         0.86416477 0.7172586  0.31227538 0.        ]
 [0.86416477 1.0000001  0.9466161  0.         0.        ]
 [0.7172586  0.9466161  1.         0.07368281 0.        ]
 [0.31227538 0.         0.07368281 1.         0.        ]
 [0.         0.         0.         0.         0.        ]]


In [6]:
# tversky similarity with alpha 0.3 and beta 1
s = s_plus.tversky_similarity(a, a.T, alpha=0.3, beta=1, k=4)
print(s.todense())

[[1.         0.7328991  0.7086824  0.28044367 0.        ]
 [0.9192917  1.         1.0923784  0.         0.        ]
 [0.610377   0.7438346  1.         0.05883808 0.        ]
 [0.23478518 0.         0.05644639 1.         0.        ]
 [0.         0.         0.         0.         0.        ]]


In [7]:
# jaccard similarity
s = s_plus.jaccard_similarity(a, a.T)
print(s.todense())

[[1.         0.7419082  0.5533325  0.18245637 0.        ]
 [0.7419082  1.         0.83343023 0.         0.        ]
 [0.5533325  0.83343023 1.         0.03822212 0.        ]
 [0.18245637 0.         0.03822212 1.         0.        ]
 [0.         0.         0.         0.         0.        ]]


In [8]:
# dice similarity
s = s_plus.dice_similarity(a, a.T)
print(s.todense())

[[1.         0.85183394 0.7124457  0.3086057  0.        ]
 [0.85183394 1.         0.9091486  0.         0.        ]
 [0.7124457  0.9091486  1.         0.07362995 0.        ]
 [0.3086057  0.         0.07362995 1.         0.        ]
 [0.         0.         0.         0.         0.        ]]


# 4. Stochastic similarities: p3alpha and rp3beta

In [9]:
# pr3 alpha similarity and rp3beta eurm
# if you have matrix with no probabilities already applied use 'sum' (otherwise use 'none' instead of 'sum')
urm = a
pop = urm.sum(axis=0).A1 #popularity item urm
s_p3alpha = s_plus.p3alpha_similarity(urm.T, urm, weight_pop_m1='sum', weight_pop_m2='sum', alpha=2)
eurm_rp3beta = s_plus.rp3beta_eurm(urm, s_p3alpha, weight_pop=pop, beta=0.1)
print(eurm_rp3beta.todense())

[[0.13469337 0.10958983 0.13340074 0.15112123]
 [0.19313264 0.01040466 0.01813251 0.18878499]
 [0.14378694 0.06801089 0.0465383  0.12920912]
 [0.00625346 0.40242207 0.35678828 0.01356905]
 [0.         0.         0.         0.        ]]


# 5. Feature weights and user weights similarities

In [10]:
# feature weights and user weights
icm = a
weight_feature_m1 = [1,0,2,1] #high value -> sim score higher
weight_feature_m2 = weight_feature_m1
weight_pop_m1 = [1,1,3,1,1] #high value -> sim score lower
weight_pop_m2 = weight_pop_m1
s = s_plus.popularity_feature_weight_similarity(icm, icm.T,
                                                weight_feature_m1=weight_feature_m1, weight_feature_m2=weight_feature_m2,
                                                weight_pop_m1=weight_pop_m1, weight_pop_m2=weight_pop_m2)
print(s.todense())

[[1.5131342  1.1191974  0.23259772 1.170018   0.        ]
 [1.1191974  1.5350298  0.3638398  0.         0.        ]
 [0.23259772 0.3638398  0.09405987 0.         0.        ]
 [1.170018   0.         0.         2.4420524  0.        ]
 [0.         0.         0.         0.         0.        ]]


In [11]:
# feature weigths (like above but just feature weights)
icm = a
weight_feature_m1 = [1,0,2,1] #high value -> sim score higher
weight_feature_m2 = weight_feature_m1
s = s_plus.feature_weight_similarity(icm, icm.T,
                                     weight_feature_m1=weight_feature_m1,
                                     weight_feature_m2=weight_feature_m2)
print(s.todense())

[[1.5131342  1.1191974  0.6977931  1.170018   0.        ]
 [1.1191974  1.5350298  1.0915194  0.         0.        ]
 [0.6977931  1.0915194  0.84653866 0.         0.        ]
 [1.170018   0.         0.         2.4420524  0.        ]
 [0.         0.         0.         0.         0.        ]]


In [12]:
# user weights (like above but just user weights)
icm = a
weight_pop_m1 = [1,1,3,1,1] #high value -> sim score lower
weight_pop_m2 = weight_pop_m1
s = s_plus.popularity_feature_weight_similarity(icm, icm.T,
                                                weight_pop_m1=weight_pop_m1,
                                                weight_pop_m2=weight_pop_m2)
print(s.todense())

[[4.7021246e+00 5.2256212e+00 1.0699002e+00 3.9068785e-01 0.0000000e+00]
 [5.2256212e+00 7.0175004e+00 1.6372035e+00 0.0000000e+00 0.0000000e+00]
 [1.0699002e+00 1.6372035e+00 4.1841373e-01 6.8606879e-03 0.0000000e+00]
 [3.9068785e-01 0.0000000e+00 6.8606879e-03 8.7989599e-01 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]


# 6. S-plus

In [13]:
# s_plus similarity
# let's use normalization term with tversky weight 0.3 (and so cosine weight 0.7 = (1-0.3)) (l)
# (with normalization term=True value are between [0,1])
# tversky parameters: alpha=1, beta=1 (t1, t2)
# cosine asymmetric: alpha=0.4 (c)
# feture weight m1: sum
# other weight terms: none
# top k: k=3
s = s_plus.s_plus_similarity(a,a.T,
                             weight_feature_m1='sum', weight_feature_m2='none',
                             weight_pop_m1='none', weight_pop_m2='none',
                             normalization=True, l=0.3,
                             t1=1, t2=1,
                             c=0.4,
                             k=3
                            )
print(s.todense())

[[0.9165111  0.8551227  0.6196204  0.         0.        ]
 [0.72883564 0.92291987 0.7537994  0.         0.        ]
 [0.66097873 0.9508436  0.9239521  0.         0.        ]
 [0.2836523  0.         0.03165975 0.9693384  0.        ]
 [0.         0.         0.         0.         0.        ]]


# 7. Others (dot_product, s_plus)

In [14]:
# dot_product, calculate ony rows 1 and 2
b = sp.random(4, 3, density=0.5)
c = sp.random(3, 5, density=0.5)
d = s_plus.dot_product_similarity(b, c, target_items=[1,2])
print(d.todense())

[[0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.43268034]
 [0.39701307 0.5345665  1.3327124  0.         1.1210891 ]
 [0.         0.         0.         0.         0.        ]]


In [15]:
# s_plus (no more a similarity)
# let's use normalization term with tversky weight 0.3 cosine weight 0.5 (l1, l2)
# tversky parameters: alpha=1, beta=1 (t1, t2)
# cosine asymmetric: alpha_x=0.4 alpha_y=0.5 (c1,c2)
# feture weight items: sum
# other weight terms: none
# top k: k=3
s = s_plus.s_plus(a,a.T,
                             weight_feature_items='sum', weight_feature_users='none',
                             weight_pop_items='none', weight_pop_users='none',
                             normalization=True, l1=0.3, l2=0.5,
                             t1=1, t2=1,
                             c1=0.4, c2=0.5,
                             k=3
                            )
print(s.todense())

[[1.256934   1.0226527  0.8109543  0.         0.        ]
 [1.0430963  1.2836556  1.1547375  0.         0.        ]
 [0.8004119  1.1163211  1.2387949  0.         0.        ]
 [0.30479366 0.         0.06763485 1.2329017  0.        ]
 [0.         0.         0.         0.         0.        ]]
