In [109]:
import torch
import torch.nn.functional as F
import numpy as np
import scipy

In [110]:
def compute_kernel_bias_svd(vecs, n_components=1):
    """
    参考文献： https://spaces.ac.cn/archives/8069
    SVD 奇异值分解：https://zhuanlan.zhihu.com/p/29846048 有误
    https://www.zhihu.com/tardis/bd/art/26306568?source_id=1001 可学
    计算kernel和bias
    vecs.shape = [num_samples, embedding_size]，
    最后的变换：y = (x + bias).dot(kernel)
    """
    vecs = np.array(vecs,dtype=np.float32)
    mu = vecs.mean(axis=0, keepdims=True)
    cov = np.cov(vecs.T)
    u, s, vh = np.linalg.svd(cov)
    W = np.dot(u, np.diag(1 / np.sqrt(s)))
    return W[:, :n_components], -mu


def torch_cov(matrix):
    """
    matrix: 512,3
    """
    # 计算矩阵每一列的均值
    mean = torch.mean(matrix, dim=0) # [3]
    # 中心化矩阵（减去均值）
    centered_matrix = matrix - mean # 广播
    # 计算协方差矩阵
    cov_matrix = torch.matmul(centered_matrix.t(), centered_matrix) / (centered_matrix.size(0) - 1)

    return cov_matrix


def compute_svd_torch(vecs, n_components=1):
    vecs = torch.tensor(vecs, dtype=torch.float32)
    mu = torch.mean(vecs, dim=1, keepdim=True)

    # cov = torch.matmul(vecs.t(), vecs) / vecs.size(0)
    cov = torch_cov(vecs.T) # 3,3

    u, s, vh = torch.svd(cov)
    sqrt_inv_s = torch.diag(1 / torch.sqrt(s))
    print(u.shape, sqrt_inv_s.shape)
    W = torch.matmul(u, sqrt_inv_s)

    W = W[:, :n_components]

    # 返回结果
    return W, -mu


def compute_corrcoef(x, y):
    """Spearman相关系数
    """
    return scipy.stats.spearmanr(x, y).correlation

In [111]:

# 定义两个张量
torch.manual_seed(42)
a = torch.randn((3,512))
b = torch.randn((3,512))

#
# # 计算cos相似度
cos_sim = F.cosine_similarity(a, b)
print(cos_sim) # tensor([0.0089, 0.4611, 0.1085])

def cosine_similarity_np_batch(a,b):
    cos_sim_ls = []
    for i, j in zip(a,b):
        print(i.shape, j.shape)
        temp = np.dot(i,j)/(norm(i)*norm(j))
        cos_sim_ls.append(temp)
    return np.array(cos_sim_ls)



tensor([-0.0486, -0.0196, -0.0228])


In [112]:
from sklearn.metrics.pairwise import cosine_similarity

from numpy.linalg import norm


# 调用compute_svd_torch函数计算结果
W_a, bias_a = compute_svd_torch(a,n_components=2)
# W_np_a,bias_np_a = compute_kernel_bias_svd(a,n_components=2)
# W_b, bias_b = compute_svd_torch(b,n_components=2)
# W_np_b,bias_np_b = compute_kernel_bias_svd(b,n_components=2)
print(W_a.shape)
# print(W_np_a.shape)
# cos_sim_torch =  F.cosine_similarity(W_a, W_b)
# cos_sim_np = cosine_similarity_np_batch(W_np_a,W_np_b)

# cos_sim_np = np.dot(W_np_a,W_np_b)/(norm(W_np_b)*norm(W_np_b))


# cos_sim_np

torch.Size([3, 3]) torch.Size([3, 3])
torch.Size([3, 2])


  vecs = torch.tensor(vecs, dtype=torch.float32)
