In [1]:
import cv2
import numpy as np

def cosine_similarity(array1, array2):
    '''
    similarity = array1 * array2 / (||array1|| * ||array2||)
    '''
    assert len(array1) == len(array2), "The shape is different."
    return np.sum(np.multiply(array1, array2)) / (np.linalg.norm(array1) * np.linalg.norm(array2))

array = cv2.imread('test1.png')
print(cosine_similarity(array, array))

0.00027125313846231567


问题：同一张图片，相似度应该为1！
这差距也太大了！

In [2]:
np.linalg.norm(array) # 看着没问题

400496.8554633107

In [3]:
np.multiply(array, array) # 为什么全是1？array有问题？

array([[[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       ...,

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        ...,
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]]], dtype=uint8)

In [4]:
array # array看着没问题

array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]]

In [5]:
# 尝试创建个numpy数组，计算multiply的值，发现没问题
test_array = np.array([255])
np.multiply(test_array, test_array)

array([65025])

In [6]:
# 观察数组元素类型，发现不一致
array.dtype, test_array.dtype

(dtype('uint8'), dtype('int32'))

In [7]:
# 验证想法，将test_array元素类型转为int8，果然重现问题
test_array = np.array([255])
test_array = test_array.astype(np.int8)
np.multiply(test_array, test_array)

array([1], dtype=int8)

定位到问题：在函数中，首先将array转换为int32或者int64，修改代码

In [8]:
import cv2
import numpy as np

def cosine_similarity(array1, array2):
    '''
    similarity = array1 * array2 / (||array1|| * ||array2||)
    '''
    array1 = array1.astype(np.int64)
    array2 = array2.astype(np.int64)
    assert array1.shape == array2.shape, "The shape is different."
    return np.sum(np.multiply(array1, array2)) / (np.linalg.norm(array1) * np.linalg.norm(array2))

array = cv2.imread('test1.png')
print(cosine_similarity(array, array))

1.0


In [9]:
# 再试试两张图片相似度
array = cv2.imread('test1.png')
array2 = cv2.imread('test2.png')
array2.resize(array.shape)
print(cosine_similarity(array, array2))

0.9286179304430939
