In [2]:
import numpy as np
import math

In [3]:
np.set_printoptions(linewidth=400)

In [5]:
item_size = 5

feature_dimension = 3

max_length = 5

epsilon = 1E-10

In [6]:
scores = np.exp(0.01 * np.random.randn(item_size) + 0.2)
print('scores:', scores)

scores: [1.24498373 1.22828675 1.21760082 1.22570727 1.23060499]


In [7]:
feature_vectors = np.random.randn(item_size, feature_dimension)
print('feature_vectors:', feature_vectors, sep='\n')

feature_vectors:
[[-0.60734231  0.62616386  0.67596723]
 [-0.29696527  0.51306072 -0.56605206]
 [ 0.52381025  0.58994059  0.70386477]
 [-0.37339468  0.8194246  -0.45269443]
 [ 1.54821436 -0.82640487 -0.39169912]]


In [15]:
print('feature_vectors: ',feature_vectors, sep='\n')
# 平方和开根号
print('按行计算范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')
print('按列计算范数:',np.linalg.norm(feature_vectors, axis=0, keepdims=True), sep='\n')

feature_vectors: 
[[-0.60734231  0.62616386  0.67596723]
 [-0.29696527  0.51306072 -0.56605206]
 [ 0.52381025  0.58994059  0.70386477]
 [-0.37339468  0.8194246  -0.45269443]
 [ 1.54821436 -0.82640487 -0.39169912]]
按行计算范数:
[[1.1035749 ]
 [0.81965518]
 [1.05727608]
 [1.00787524]
 [1.79814931]]
按列计算范数:
[[1.80771169 1.53549473 1.27715805]]


In [17]:
# 向量的单位化，操作后向量的l2范数为1；l2归一化
feature_vectors = feature_vectors / np.linalg.norm(feature_vectors, axis=1, keepdims=True) # 行向量的范数，欧几里得范数
print('l2_norm_feature_vectors:', feature_vectors, sep='\n')
print('归一化后的l2范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')

l2_norm_feature_vectors:
[[-0.55034082  0.56739589  0.61252501]
 [-0.36230512  0.62594703 -0.69059779]
 [ 0.49543375  0.55798159  0.66573414]
 [-0.37047709  0.81302185 -0.4491572 ]
 [ 0.86100435 -0.45958634 -0.21783459]]
归一化后:
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [18]:
# 如需使用点积计算向量相似度，则必须对向量作归一化处理。处理后点积与余弦相似度等价。https://milvus.io/cn/docs/metric.md
similarities = np.dot(feature_vectors, feature_vectors.T)
print('similarities:', similarities, sep='\n')

similarities:
[[ 1.          0.13154265  0.45171785  0.3900739  -0.86804238]
 [ 0.13154265  1.         -0.28998579  0.95332134 -0.4491869 ]
 [ 0.45171785 -0.28998579  1.         -0.02891491  0.02510997]
 [ 0.3900739   0.95332134 -0.02891491  1.         -0.59479414]
 [-0.86804238 -0.4491869   0.02510997 -0.59479414  1.        ]]


In [11]:
kernel_matrix = scores.reshape((item_size, 1)) * similarities * scores.reshape((1, item_size))
print('reshaeped score:', scores.reshape((item_size, 1)) * scores.reshape((1, item_size)), sep='\n')

reshaeped score:
[[1.47131136 1.4869409  1.46839703 1.47591865 1.47999239]
 [1.4869409  1.50273646 1.48399561 1.49159713 1.49571415]
 [1.46839703 1.48399561 1.46548848 1.4729952  1.47706087]
 [1.47591865 1.49159713 1.4729952  1.48054036 1.48462686]
 [1.47999239 1.49571415 1.47706087 1.48462686 1.48872464]]


In [12]:
print('kernel_matrix:', kernel_matrix, sep='\n')

kernel_matrix:
[[ 1.47131136 -0.37569141 -1.13395581  0.08012901  0.24786343]
 [-0.37569141  1.50273646  1.08237964  1.28092904  1.26342265]
 [-1.13395581  1.08237964  1.46548848  0.4731487   0.38833158]
 [ 0.08012901  1.28092904  0.4731487   1.48054036  1.47169537]
 [ 0.24786343  1.26342265  0.38833158  1.47169537  1.48872464]]


In [13]:
cis = np.zeros((max_length, item_size))

In [14]:
di2s = np.copy(np.diag(kernel_matrix))

In [15]:
selected_items = list()

In [16]:
selected_item = np.argmax(di2s)

In [17]:
selected_items.append(selected_item)

In [18]:
while len(selected_items) < max_length:
    k = len(selected_items) - 1
    
    
    ci_optimal = cis[:k, selected_item]
    
    
    di_optimal = math.sqrt(di2s[selected_item])
    
    
    elements = kernel_matrix[selected_item, :]
    
    
    eis = (elements - np.dot(ci_optimal, cis[:k, :])) / di_optimal
    
    
    cis[k, :] = eis
    
    
    di2s -= np.square(eis)
    
    
    di2s[selected_item] = -np.inf
    
    
    selected_item = np.argmax(di2s)
    
    
    if di2s[selected_item] < epsilon:
        break
        
        
    selected_items.append(selected_item)

In [19]:
print('scores:', scores)
print('selected_items_index:', selected_items)
print("selected_items_value:", scores[selected_items])

scores: [1.21297624 1.22586152 1.21057362 1.21677457 1.22013304]
selected_items_index: [1, 0, 3]
selected_items_value: [1.22586152 1.21297624 1.21677457]
