In [1]:
# 繪圖設定字體顏色(否則黑色會看不到)
import matplotlib as mpl
COLOR1 = 'red'
COLOR2 = 'blue'
mpl.rcParams['text.color'] = COLOR1
mpl.rcParams['axes.labelcolor'] = COLOR2
mpl.rcParams['xtick.color'] = COLOR2
mpl.rcParams['ytick.color'] = COLOR2

In [2]:
import os, sys
import numpy as np
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
import tensorflow as tf
# 計算相似矩陣
    # 將images目錄內的每一張照片轉成特徵向量，再兩兩作比較
    # 利用 cosine 函數計算兩個特徵向量的角度，越接近1表示越相似(-1 < cosine < 1)
# 可直接用 from sklearn.metrics.pairwise import cosine_similarity
def cosine_similarity(ratings):
    sim = ratings.dot(ratings.T)
    # 不是np.ndarray 就轉型別
    if not isinstance(sim, np.ndarray):
        sim = sim.toarray()
    # 對角線的值開根號
    norms = np.array([np.sqrt(np.diagonal(sim))])
    return (sim / norms / norms.T)

# main
def main():
    # 1.取得資料 + 前處理：0.data/images目錄 找所有jpg檔
    y_test = []     # 檔名
    x_test = []     # 圖檔矩陣
    for img_path in os.listdir("0.data/images"):
        if img_path.endswith(".jpg"):
            img = tf.keras.preprocessing.image.load_img("0.data/images/"+img_path, 
                                                        target_size=(224, 224))
            y_test.append(img_path)
            x = tf.keras.preprocessing.image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            if len(x_test) > 0:
                x_test = np.concatenate((x_test, x))
            else:
                x_test = x
    # 預處理
    x_test = preprocess_input(x_test)
    
    # 2.模型
    model = VGG16(weights='imagenet', include_top=False) 
    # 3.萃取特徵 shape=(1, 7, 7, 512)
    features = model.predict(x_test)
    
    # 4.計算相似矩陣
    features_compress = features.reshape(len(y_test), 7 * 7 * 512)
        # 特徵丟進去計算相似度矩陣
    sim = cosine_similarity(features_compress)
        # 相似度矩陣數值 * 100 去小數點
    for i in range(0, len(sim)):
        for j in range(0, len(sim[i])):
            sim[i][j] = round(sim[i][j] * 100)
    print(sim)
    print('-' * 10)
    
    # 5.指定圖片 利用和其他圖片的相似度數值找到最接近的圖片(大到小為相似度高到低)
        # 圖檔指定'bear.jpg'
    list1 = [i for i, y1 in enumerate(y_test) if y1.lower() == 'bear.jpg']
    inputNo = list1[0]
    print('inputNo =', inputNo)
    print('-' * 10)
    # 相似矩陣排序-大到小
        # 利用index找到此張圖片和其他圖片的相似度數值(越接近100-越大-越相近)
    print(sim[inputNo])
    print('-' * 10)
        # 由大到小排列 並回傳索引值 top = np.argsort(-x, axis=0)
    top = np.argsort(-sim[inputNo], axis=0)

    # 依相似度列出檔名
        # 將相似度大到小的索引值轉回大到小的檔名
        # 前二名：recommend = [y_test[i] for i in top[:2]]
    recommend = [y_test[i] for i in top]
    print(recommend)

if __name__ == "__main__":
    main()

[[100.  21.  22.  17.  10.  16.  13.   3.   4.   9.  11.  11.   7.  10.]
 [ 21. 100.  15.  11.  12.  12.   9.   2.   5.   6.  14.   8.   5.   4.]
 [ 22.  15. 100.  24.  10.  10.   7.   3.   3.   7.  10.   8.   9.   6.]
 [ 17.  11.  24. 100.  11.  12.   9.   3.   9.  13.   9.   9.   8.  11.]
 [ 10.  12.  10.  11. 100.  11.   8.   2.   4.   6.  10.   5.   4.  11.]
 [ 16.  12.  10.  12.  11. 100.  29.   2.   8.   7.  16.  19.  10.   4.]
 [ 13.   9.   7.   9.   8.  29. 100.   4.   8.   7.  10.  17.   7.   4.]
 [  3.   2.   3.   3.   2.   2.   4. 100.   5.   5.   2.   3.   4.   2.]
 [  4.   5.   3.   9.   4.   8.   8.   5. 100.  11.   9.  12.   9.   6.]
 [  9.   6.   7.  13.   6.   7.   7.   5.  11. 100.  14.  12.  13.   9.]
 [ 11.  14.  10.   9.  10.  16.  10.   2.   9.  14. 100.  35.  31.   3.]
 [ 11.   8.   8.   9.   5.  19.  17.   3.  12.  12.  35. 100.  27.   6.]
 [  7.   5.   9.   8.   4.  10.   7.   4.   9.  13.  31.  27. 100.   4.]
 [ 10.   4.   6.  11.  11.   4.   4.   2.   6.   9.