# 部分採用，擷取特徵
模型部分採用：只擷取特徵，不作辨識。

- model = VGG16(weights='imagenet', include_top=False) 的 include_top=False 表不含後三層(辨識層)。
- 使用 model.summary() 可以比較 include_top=True 的差別。少三層：一層Flatten、二層 Dense。
- 最後得到的特徵維度為 (1, 7, 7, 512)，因為最後一層的 Feature Map 寬高為 (7, 7)，output 為 512 個神經元。

In [1]:
# 限制 GPU 記憶體用量
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(
          gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])

In [2]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

# 預先訓練好的模型 -- VGG16, 不含後三層(辨識層)
model = VGG16(weights='imagenet', include_top=False)

# 任意一張圖片，例如大象
img_path = './images/elephant.jpg'
# 載入圖檔，並縮放寬高為 (224, 224) 
img = image.load_img(img_path, target_size=(224, 224))
# 加一維，變成 (1, 224, 224, 3)，最後一維是色彩
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

features = model.predict(x)

In [3]:
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [4]:
model2 = VGG16(weights='imagenet', include_top=True)
model2.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [5]:
print(features)

[[[[ 0.         0.         0.        ...  0.         0.
     0.       ]
   [ 0.         0.        41.877     ...  0.         0.
     0.       ]
   [ 1.0922072  0.        22.865     ...  0.         0.
     0.       ]
   ...
   [ 0.         0.         0.        ...  0.         0.
     0.       ]
   [ 0.         0.         0.        ...  0.         0.
     0.       ]
   [ 0.         0.         0.        ...  0.         0.
     0.       ]]

  [[ 0.         0.        36.385128  ...  0.         0.
     3.2606556]
   [ 0.         0.        80.499245  ...  8.4254465  0.
     0.       ]
   [ 0.         0.        48.482693  ...  0.         0.
     0.       ]
   ...
   [ 0.         0.         0.        ...  4.3430004  0.
     0.       ]
   [ 0.         0.         0.        ...  0.         0.
     0.       ]
   [ 0.         0.         0.        ...  0.         0.
     0.       ]]

  [[ 0.         0.        10.000868  ...  0.         0.
     2.2095578]
   [ 0.         0.        25.23596   ... 42.21

In [6]:
print(features.shape)

(1, 7, 7, 512)


# 使用 cosine_similarity 比較特徵向量

In [7]:
from os import listdir
from os.path import isfile, join

# 讀取 images 目錄下所有圖檔
img_path = './images/'
image_files = np.array([f for f in listdir(img_path) if isfile(join(img_path, f)) and f[-3:] == 'jpg'])
image_files

array(['bird01.jpg', 'elephant.jpg', 'elephant2.jpg', 'input.jpg',
       'style.jpg', 'Tiger.jpg', 'Tiger2.jpg', 'Tiger3.jpg',
       'with-mask.jpg', 'without-mask.jpg', '太陽花.jpg'], dtype='<U16')

In [8]:
# 每個圖檔經過預測，取得特徵向量
import numpy as np

X = np.array([])
# 合併每個圖檔的像素
for f in image_files:
    image_file = join(img_path, f)
    # 載入圖檔，並縮放寬高為 (224, 224) 
    img = image.load_img(image_file, target_size=(224, 224))
    # 加一維，變成 (1, 224, 224, 3)，最後一維是色彩
    img2 = image.img_to_array(img)
    img2 = np.expand_dims(img2, axis=0)
    if len(X.shape) == 1:
        X = img2
    else:
        X = np.concatenate((X, img2), axis=0)

X = preprocess_input(X)

# 預測
features = model.predict(X)

features.shape, X.shape

((11, 7, 7, 512), (11, 224, 224, 3))

In [12]:
# 使用 cosine_similarity 比較特徵向量
from sklearn.metrics.pairwise import cosine_similarity

features2 = features.reshape((features.shape[0], -1))
print(features2.shape)
# 比較對象：Tiger3.jpg
no=-4
print(image_files[no])
similar_list = cosine_similarity(features2[no:no+1], features2[:no], dense_output=False)
print(np.sort(similar_list[0])[::-1])
image_files[:no][np.argsort(similar_list[0])[::-1]]

(11, 25088)
Tiger3.jpg
[0.31442708 0.2666164  0.12536138 0.09697835 0.07170606 0.04489492
 0.04210067]


array(['Tiger.jpg', 'Tiger2.jpg', 'style.jpg', 'elephant.jpg',
       'elephant2.jpg', 'input.jpg', 'bird01.jpg'], dtype='<U16')

In [15]:
# 比較對象：elephant.jpg
no=1
print(image_files[no])
similar_list = cosine_similarity(features2[no:no+1], features2[no+1:], dense_output=False)
print(np.sort(similar_list[0])[::-1])
image_files[no+1:][np.argsort(similar_list[0])[::-1]]

elephant.jpg
[0.2941918  0.1914222  0.15632682 0.09697835 0.07013359 0.04092815
 0.03696896 0.02463236 0.02058096]


array(['elephant2.jpg', 'Tiger2.jpg', 'Tiger.jpg', 'Tiger3.jpg',
       'style.jpg', 'with-mask.jpg', '太陽花.jpg', 'input.jpg',
       'without-mask.jpg'], dtype='<U16')