In [29]:
import numpy as np
import math

In [30]:
np.set_printoptions(linewidth=400)

In [31]:
item_size = 5

feature_dimension = 3

max_length = 5

epsilon = 1E-10

In [32]:
scores = np.exp(0.01 * np.random.randn(item_size) + 0.2)
print('scores:', scores)

scores: [1.22268117 1.23160628 1.21007611 1.23402012 1.21637192]


In [33]:
feature_vectors = np.random.randn(item_size, feature_dimension)
print('feature_vectors:', feature_vectors, sep='\n')

feature_vectors:
[[ 0.16086589  1.38866851 -1.08295817]
 [-1.01357357  1.81514575  0.81127292]
 [-0.74623112 -0.3560815  -0.0775322 ]
 [ 0.5665467  -1.84498868  0.34776585]
 [ 0.45257085 -0.3508284   0.42348161]]


In [34]:
print('feature_vectors: ',feature_vectors, sep='\n')
# 平方和开根号
print('按行计算范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')
print('按列计算范数:',np.linalg.norm(feature_vectors, axis=0, keepdims=True), sep='\n')

feature_vectors: 
[[ 0.16086589  1.38866851 -1.08295817]
 [-1.01357357  1.81514575  0.81127292]
 [-0.74623112 -0.3560815  -0.0775322 ]
 [ 0.5665467  -1.84498868  0.34776585]
 [ 0.45257085 -0.3508284   0.42348161]]
按行计算范数:
[[1.76835417]
 [2.2316472 ]
 [0.83046141]
 [1.9610965 ]
 [0.71220616]]
按列计算范数:
[[1.46146011 2.97943152 1.46193404]]


In [35]:
# 向量的单位化，操作后向量的l2范数为1；l2归一化
feature_vectors = feature_vectors / np.linalg.norm(feature_vectors, axis=1, keepdims=True) # 行向量的范数，欧几里得范数
print('l2_norm_feature_vectors:', feature_vectors, sep='\n')
print('归一化后的l2范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')

l2_norm_feature_vectors:
[[ 0.09096927  0.78528868 -0.61241022]
 [-0.45418181  0.81336591  0.36353099]
 [-0.8985741  -0.42877549 -0.09336039]
 [ 0.28889282 -0.94079444  0.17733235]
 [ 0.63544922 -0.49259389  0.59460537]]
归一化后的l2范数:
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [36]:
# 如需使用点积计算向量相似度，则必须对向量作归一化处理。处理后点积与余弦相似度等价。https://milvus.io/cn/docs/metric.md
similarities = np.dot(feature_vectors, feature_vectors.T)
print('similarities:', similarities, sep='\n')

similarities:
[[ 1.          0.37478036 -0.36128031 -0.821115   -0.69316446]
 [ 0.37478036  1.          0.02542526 -0.83195418 -0.47311107]
 [-0.36128031  0.02542526  1.          0.12724217 -0.41529862]
 [-0.821115   -0.83195418  0.12724217  1.          0.75244907]
 [-0.69316446 -0.47311107 -0.41529862  0.75244907  1.        ]]


In [37]:
kernel_matrix = scores.reshape((item_size, 1)) * similarities * scores.reshape((1, item_size))
print('reshaeped score:', scores.reshape((item_size, 1)) * scores.reshape((1, item_size)), sep='\n')

reshaeped score:
[[1.49494924 1.50586181 1.47953728 1.50881316 1.48723504]
 [1.50586181 1.51685403 1.49033734 1.51982693 1.4980913 ]
 [1.47953728 1.49033734 1.4642842  1.49325827 1.47190261]
 [1.50881316 1.51982693 1.49325827 1.52280565 1.50102742]
 [1.48723504 1.4980913  1.47190261 1.50102742 1.47956065]]


In [38]:
print('kernel_matrix:', kernel_matrix, sep='\n')

kernel_matrix:
[[ 1.49494924  0.56436743 -0.53452769 -1.23890911 -1.03089848]
 [ 0.56436743  1.51685403  0.03789221 -1.26442637 -0.70876358]
 [-0.53452769  0.03789221  1.4642842   0.19000542 -0.61127912]
 [-1.23890911 -1.26442637  0.19000542  1.52280565  1.12944669]
 [-1.03089848 -0.70876358 -0.61127912  1.12944669  1.47956065]]


In [39]:
# 全0矩阵
cis = np.zeros((max_length, item_size))
cis 

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [40]:
# 分数的平方
di2s = np.copy(np.diag(kernel_matrix))
di2s

array([1.49494924, 1.51685403, 1.4642842 , 1.52280565, 1.47956065])

In [41]:
selected_items = list()

In [42]:
selected_item = np.argmax(di2s)

In [43]:
print('selected_items BEFORE:', selected_items)
selected_items.append(selected_item)
print('selected_items AFTER:', selected_items)

selected_items BEFORE: []
selected_items AFTER: [3]


In [44]:
while len(selected_items) < max_length:
    k = len(selected_items) - 1
    print('k:', k)
    
    ci_optimal = cis[:k, selected_item]
    print('ci_optimal:', ci_optimal, sep='\n')
    
    di_optimal = math.sqrt(di2s[selected_item])
    print('di_optimal:', di_optimal, sep='\n')
    
    elements = kernel_matrix[selected_item, :]
    print('elements:', elements, sep='\n')
    
    eis = (elements - np.dot(ci_optimal, cis[:k, :])) / di_optimal
    print('eis:', eis, sep='\n')
    
    cis[k, :] = eis
    print('cis:', cis, sep='\n')
    
    di2s -= np.square(eis)
    print('di2s:', di2s, sep='\n')
    
    di2s[selected_item] = -np.inf
    print('di2s:', di2s, sep='\n')
    
    selected_item = np.argmax(di2s)
    print('selected_item:', selected_item, sep='\n')
    
    if di2s[selected_item] < epsilon:
        break
        
        
    selected_items.append(selected_item)

    print('selected_items:', selected_items, sep='\n')

k: 0
ci_optimal:
[]
di_optimal:
1.2340201180093437
elements:
[-1.23890911 -1.26442637  0.19000542  1.52280565  1.12944669]
eis:
[-1.00396184 -1.02464     0.15397271  1.23402012  0.91525793]
cis:
[[-1.00396184 -1.02464     0.15397271  1.23402012  0.91525793]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]]
di2s:
[0.48700986 0.4669669  1.44057661 0.         0.64186358]
di2s:
[0.48700986 0.4669669  1.44057661       -inf 0.64186358]
selected_item:
2
selected_items:
[3, 2]
k: 1
ci_optimal:
[0.15397271]
di_optimal:
1.2002402289275347
elements:
[-0.53452769  0.03789221  1.4642842   0.19000542 -0.61127912]
eis:
[-0.31655743  0.16301637  1.20024023  0.         -0.62671109]
cis:
[[-1.00396184 -1.02464     0.15397271  1.23402012  0.91525793]
 [-0.31655743  0.16301637  1.20024023  0.         -0.62

In [45]:
print('scores:', scores)
print('selected_items_index:', selected_items)
print("selected_items_value:", scores[selected_items])

scores: [1.22268117 1.23160628 1.21007611 1.23402012 1.21637192]
selected_items_index: [3, 2, 1]
selected_items_value: [1.23402012 1.21007611 1.23160628]
