In [63]:
def cosine_similarity(v1, v2):
    """
    计算两个向量的余弦相似度
    
    参数:
        v1: 第一个向量
        v2: 第二个向量
    返回:
        两个向量的余弦相似度
    """
    # 计算点积
    dot_product = sum(a * b for a, b in zip(v1, v2))
    
    # 计算向量的模
    norm_v1 = sum(a * a for a in v1) ** 0.5
    norm_v2 = sum(a * a for a in v2) ** 0.5
    
    # 避免除零错误
    if norm_v1 == 0 or norm_v2 == 0:
        return 0
    
    # 计算余弦相似度
    return dot_product / (norm_v1 * norm_v2)

def cosine_similarity_with_average(v, vectors):
    """
    计算一个向量与一组向量的平均向量的余弦相似度
    
    参数:
        v: 待比较的向量
        vectors: 向量列表
    返回:
        余弦相似度值
    """
    # 计算平均向量
    n = len(vectors)
    if n == 0:
        return 0
        
    # 计算所有向量对应位置的和
    avg_vector = []
    vector_len = len(vectors[0])
    for i in range(vector_len):
        position_sum = sum(vec[i] for vec in vectors)
        avg_vector.append(position_sum / n)
    
    # 计算与平均向量的余弦相似度
    return cosine_similarity(v, avg_vector)

def cosine_similarity_with_all(v, vectors):
    """
    计算一个向量与其他所有向量的余弦相似度的平均值
    
    参数:
        v: 待比较的向量
        vectors: 向量列表
    返回:
        与所有向量的余弦相似度的平均值
    """
    # 计算与每个向量的余弦相似度
    similarities = []
    for vector in vectors:
        sim = cosine_similarity(v, vector)
        similarities.append(sim)
        
    # 计算平均相似度
    n = len(similarities)
    if n == 0:
        return 0
    
    return sum(similarities) / n


v1=[65,0,62,0,61]
v2=[63,68,0,65,0]
v3=[67,61,0,0,66]
v4=[0,65,68,63,0]
v5=[66,0,61,65,0]
v6=[62,0,65,0,63]
v7=[64,66,0,0,65]
v8=[61,65,0,64,0]
v9=[0,63,66,62,0]
v10=[0,67,0,61,65]
v11=[85,83,82,84,85]
v12=[84,82,85,83,84]
v13=[83,85,84,82,83]
v14=[85,84,83,85,82]
v15=[82,83,85,84,83]
v_list=[v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15]
# print(cosine_similarity(v1,v2))
# print(cosine_similarity(v1,v3))
print(cosine_similarity_with_all(v15,v_list))


0.848792038487134


In [65]:
def euclidean_distance_normalized(v1, v2):
    """
    计算两个向量的归一化欧式距离
    
    参数:
        v1: 第一个向量
        v2: 第二个向量
    返回:
        归一化后的欧式距离 (0-1之间)
    """
    # 计算欧式距离
    squared_diff = [(a - b) ** 2 for a, b in zip(v1, v2)]
    distance = (sum(squared_diff)) ** 0.5
    
    # 归一化 - 使用向量维度进行归一化
    max_possible_distance = (sum([max(v1) ** 2 for _ in range(len(v1))])) ** 0.5
    normalized_distance = distance / max_possible_distance
    
    return normalized_distance

def euclidean_distance_with_all(v, vectors):
    """
    计算一个向量与其他所有向量的归一化欧式距离的平均值
    
    参数:
        v: 待比较的向量
        vectors: 向量列表
    返回:
        与所有向量的归一化欧式距离的平均值
    """
    distances = []
    for vector in vectors:
        dist = euclidean_distance_normalized(v, vector)
        distances.append(dist)
        
    # 计算平均距离
    n = len(distances)
    if n == 0:
        return 0
        
    return sum(distances) / n

# 测试新函数
print("归一化欧式距离平均值:", euclidean_distance_with_all(v1, v_list))



归一化欧式距离平均值: 0.6992000842068907


In [67]:
def cosine_similarity_matrix(vectors):
    """
    计算所有向量两两之间的余弦相似度矩阵
    
    参数:
        vectors: 向量列表
    返回:
        相似度矩阵
    """
    n = len(vectors)
    similarity_matrix = []
    
    for i in range(n):
        row = []
        for j in range(n):
            # 计算向量点积
            dot_product = sum(a * b for a, b in zip(vectors[i], vectors[j]))
            
            # 计算向量模长
            norm_i = sum(x * x for x in vectors[i]) ** 0.5
            norm_j = sum(x * x for x in vectors[j]) ** 0.5
            
            # 计算余弦相似度
            if norm_i == 0 or norm_j == 0:
                similarity = 0
            else:
                similarity = dot_product / (norm_i * norm_j)
                
            row.append(similarity)
        similarity_matrix.append(row)
        
    return similarity_matrix








# 测试余弦相似度矩阵
similarity_matrix = cosine_similarity_matrix(v_list)
print("余弦相似度矩阵:")
for row in similarity_matrix:
    print([round(x, 3) for x in row])



余弦相似度矩阵:
[1.0, 0.333, 0.689, 0.343, 0.67, 0.999, 0.665, 0.333, 0.342, 0.327, 0.776, 0.781, 0.774, 0.77, 0.773]
[0.333, 1.0, 0.659, 0.664, 0.668, 0.314, 0.668, 1.0, 0.666, 0.675, 0.776, 0.768, 0.774, 0.782, 0.771]
[0.689, 0.659, 1.0, 0.312, 0.356, 0.676, 0.999, 0.655, 0.311, 0.67, 0.779, 0.772, 0.776, 0.773, 0.767]
[0.343, 0.664, 0.312, 1.0, 0.656, 0.356, 0.337, 0.665, 1.0, 0.649, 0.767, 0.772, 0.777, 0.776, 0.78]
[0.67, 0.668, 0.356, 0.656, 1.0, 0.662, 0.338, 0.673, 0.658, 0.321, 0.773, 0.778, 0.77, 0.779, 0.776]
[0.999, 0.314, 0.676, 0.356, 0.662, 1.0, 0.653, 0.314, 0.354, 0.335, 0.776, 0.781, 0.774, 0.77, 0.774]
[0.665, 0.668, 0.999, 0.337, 0.338, 0.653, 1.0, 0.663, 0.335, 0.689, 0.779, 0.772, 0.777, 0.773, 0.768]
[0.333, 1.0, 0.655, 0.665, 0.673, 0.314, 0.663, 1.0, 0.666, 0.675, 0.776, 0.769, 0.774, 0.782, 0.771]
[0.342, 0.666, 0.311, 1.0, 0.658, 0.354, 0.335, 0.666, 1.0, 0.651, 0.767, 0.772, 0.777, 0.776, 0.78]
[0.327, 0.675, 0.67, 0.649, 0.321, 0.335, 0.689, 0.675, 0.651, 1.0, 0.7

In [73]:
def average_similarity_from_matrix(v_idx, similarity_matrix):
    """
    从相似度矩阵中计算某个向量与其他向量的平均相似度
    
    参数:
        v_idx: 目标向量在矩阵中的索引
        similarity_matrix: 相似度矩阵
    返回:
        归一化后的平均相似度
    """
    # 获取该向量与所有向量的相似度
    similarities = similarity_matrix[v_idx]
    
    # 计算平均相似度
    n = len(similarities)
    if n == 0:
        return 0
        
    return sum(similarities) / n


print("平均相似度:", average_similarity_from_matrix(11, similarity_matrix))

平均相似度: 0.8488042240009345
