In [2]:
import numpy as np
import math

In [3]:
np.set_printoptions(linewidth=400)

In [4]:
item_size = 5

feature_dimension = 3

max_length = 5

epsilon = 1E-10

In [5]:
scores = np.exp(0.01 * np.random.randn(item_size) + 0.2)
print('scores:', scores)

scores: [1.2248457  1.21713085 1.22713702 1.24246508 1.21675492]


In [6]:
feature_vectors = np.random.randn(item_size, feature_dimension)
print('feature_vectors:', feature_vectors, sep='\n')

feature_vectors:
[[ 0.81196301  0.37418268  0.02192176]
 [ 0.52804839  0.46635943  0.5419263 ]
 [-1.96795023  0.90159699  0.10030974]
 [ 1.77949445 -0.02183738 -1.553502  ]
 [-0.70429613 -0.76561583  1.08572172]]


In [7]:
print('feature_vectors: ',feature_vectors, sep='\n')
# 平方和开根号
print('按行计算范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')
print('按列计算范数:',np.linalg.norm(feature_vectors, axis=0, keepdims=True), sep='\n')

feature_vectors: 
[[ 0.81196301  0.37418268  0.02192176]
 [ 0.52804839  0.46635943  0.5419263 ]
 [-1.96795023  0.90159699  0.10030974]
 [ 1.77949445 -0.02183738 -1.553502  ]
 [-0.70429613 -0.76561583  1.08572172]]
按行计算范数:
[[0.89430261]
 [0.88882525]
 [2.16697192]
 [2.36229673]
 [1.50365964]]
按列计算范数:
[[2.91094155 1.32552835 1.97392676]]


In [8]:
# 向量的单位化，操作后向量的l2范数为1；l2归一化
feature_vectors = feature_vectors / np.linalg.norm(feature_vectors, axis=1, keepdims=True) # 行向量的范数，欧几里得范数
print('l2_norm_feature_vectors:', feature_vectors, sep='\n')
print('归一化后的l2范数:',np.linalg.norm(feature_vectors, axis=1, keepdims=True), sep='\n')

l2_norm_feature_vectors:
[[ 0.9079287   0.41840723  0.02451269]
 [ 0.59409697  0.52469192  0.60971074]
 [-0.90815678  0.41606307  0.04629028]
 [ 0.75328998 -0.00924413 -0.65762357]
 [-0.468388   -0.50916831  0.72205285]]
归一化后的l2范数:
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [9]:
# 如需使用点积计算向量相似度，则必须对向量作归一化处理。处理后点积与余弦相似度等价。https://milvus.io/cn/docs/metric.md
similarities = np.dot(feature_vectors, feature_vectors.T)
print('similarities:', similarities, sep='\n')

similarities:
[[ 1.          0.77387823 -0.6493231   0.66394566 -0.62060315]
 [ 0.77387823  1.         -0.29300457  0.04171682 -0.10518101]
 [-0.6493231  -0.29300457  1.         -0.71839312  0.24694764]
 [ 0.66394566  0.04171682 -0.71839312  1.         -0.82296414]
 [-0.62060315 -0.10518101  0.24694764 -0.82296414  1.        ]]


In [10]:
kernel_matrix = scores.reshape((item_size, 1)) * similarities * scores.reshape((1, item_size))
print('reshaeped score:', scores.reshape((item_size, 1)) * scores.reshape((1, item_size)), sep='\n')

reshaeped score:
[[1.500247   1.49079749 1.5030535  1.52182802 1.49033704]
 [1.49079749 1.48140751 1.49358632 1.51224258 1.48094996]
 [1.5030535  1.49358632 1.50586526 1.52467489 1.49312501]
 [1.52182802 1.51224258 1.52467489 1.54371948 1.51177551]
 [1.49033704 1.48094996 1.49312501 1.51177551 1.48049255]]


In [11]:
print('kernel_matrix:', kernel_matrix, sep='\n')

kernel_matrix:
[[ 1.500247    1.15369572 -0.97596737  1.0104111  -0.92490786]
 [ 1.15369572  1.48140751 -0.43762762  0.06308596 -0.15576782]
 [-0.97596737 -0.43762762  1.50586526 -1.09531596  0.36872369]
 [ 1.0104111   0.06308596 -1.09531596  1.54371948 -1.24413703]
 [-0.92490786 -0.15576782  0.36872369 -1.24413703  1.48049255]]


In [12]:
# 全0矩阵
cis = np.zeros((max_length, item_size))
cis 

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [13]:
# 分数的平方
di2s = np.copy(np.diag(kernel_matrix))
di2s

array([1.500247  , 1.48140751, 1.50586526, 1.54371948, 1.48049255])

In [14]:
selected_items = list()

In [15]:
selected_item = np.argmax(di2s)

In [16]:
print('selected_items BEFORE:', selected_items)
selected_items.append(selected_item)
print('selected_items AFTER:', selected_items)

selected_items BEFORE: []
selected_items AFTER: [3]


In [17]:
while len(selected_items) < max_length:
    k = len(selected_items) - 1
    print('k:', k)
    
    ci_optimal = cis[:k, selected_item]
    print('ci_optimal:', ci_optimal, sep='\n')
    
    di_optimal = math.sqrt(di2s[selected_item])
    print('di_optimal:', di_optimal, sep='\n')
    
    elements = kernel_matrix[selected_item, :]
    print('elements:', elements, sep='\n')
    
    eis = (elements - np.dot(ci_optimal, cis[:k, :])) / di_optimal
    print('eis:', eis, sep='\n')
    
    cis[k, :] = eis
    print('cis:', cis, sep='\n')
    
    di2s -= np.square(eis)
    print('di2s:', di2s, sep='\n')
    
    di2s[selected_item] = -np.inf
    print('di2s:', di2s, sep='\n')
    
    selected_item = np.argmax(di2s)
    print('selected_item:', selected_item, sep='\n')
    
    if di2s[selected_item] < epsilon:
        break
        
        
    selected_items.append(selected_item)

    print('selected_items:', selected_items, sep='\n')

k: 0
ci_optimal:
[]
di_optimal:
1.242465082431504
elements:
[ 1.0104111   0.06308596 -1.09531596  1.54371948 -1.24413703]
eis:
[ 0.81323098  0.05077483 -0.88156679  1.24246508 -1.00134567]
cis:
[[ 0.81323098  0.05077483 -0.88156679  1.24246508 -1.00134567]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]]
di2s:
[8.38902364e-01 1.47882943e+00 7.28705251e-01 2.22044605e-16 4.77799402e-01]
di2s:
[0.83890236 1.47882943 0.72870525       -inf 0.4777994 ]
selected_item:
1
selected_items:
[3, 1]
k: 1
ci_optimal:
[0.05077483]
di_optimal:
1.2160713077760978
elements:
[ 1.15369572  1.48140751 -0.43762762  0.06308596 -0.15576782]
eis:
[ 9.14752325e-01  1.21607131e+00 -3.23061825e-01  1.14119852e-17 -8.62816666e-02]
cis:
[[ 8.13230983e-01  5.07748331e-02 -8.81566791e-01  1.24246508e+00 -1.00134567e+

In [18]:
print('scores:', scores)
print('selected_items_index:', selected_items)
print("selected_items_value:", scores[selected_items])

scores: [1.2248457  1.21713085 1.22713702 1.24246508 1.21675492]
selected_items_index: [3, 1, 2]
selected_items_value: [1.24246508 1.21713085 1.22713702]
