# 第6章 近似近傍探索

画像のダウンロード

In [1]:
!mkdir -p ~/.kaggle/
!cp drive/MyDrive/visual-search-book/landscape/kaggle.json ~/.kaggle
!kaggle datasets download arnaud58/landscape-pictures
!unzip landscape-pictures.zip -d landscape-pictures

Downloading landscape-pictures.zip to /content
 97% 602M/620M [00:03<00:00, 211MB/s]
100% 620M/620M [00:04<00:00, 159MB/s]
Archive:  landscape-pictures.zip
  inflating: landscape-pictures/00000000.jpg  
  inflating: landscape-pictures/00000000_(2).jpg  
  inflating: landscape-pictures/00000000_(3).jpg  
  inflating: landscape-pictures/00000000_(4).jpg  
  inflating: landscape-pictures/00000000_(5).jpg  
  inflating: landscape-pictures/00000000_(6).jpg  
  inflating: landscape-pictures/00000000_(7).jpg  
  inflating: landscape-pictures/00000001.jpg  
  inflating: landscape-pictures/00000001_(2).jpg  
  inflating: landscape-pictures/00000001_(3).jpg  
  inflating: landscape-pictures/00000001_(4).jpg  
  inflating: landscape-pictures/00000001_(5).jpg  
  inflating: landscape-pictures/00000001_(6).jpg  
  inflating: landscape-pictures/00000001_(7).jpg  
  inflating: landscape-pictures/00000002.jpg  
  inflating: landscape-pictures/00000002_(2).jpg  
  inflating: landscape-pictures/00000002

## Annoyのインストール

In [2]:
!pip install annoy==1.17.0

Collecting annoy==1.17.0
  Downloading annoy-1.17.0.tar.gz (646 kB)
[K     |████████████████████████████████| 646 kB 5.0 MB/s 
[?25hBuilding wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.0-cp37-cp37m-linux_x86_64.whl size=391601 sha256=009b39dee106781e7de37e1c18744952f6b38e70aa25483f33cbb1d5199a7088
  Stored in directory: /root/.cache/pip/wheels/4f/e8/1e/7cc9ebbfa87a3b9f8ba79408d4d31831d67eea918b679a4c07
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.0


## ANNインデックスの作成

In [3]:
import pathlib
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import resnet
from tensorflow.keras.preprocessing import image
from annoy import AnnoyIndex

dir_path = "./landscape-pictures/"
image_path_list = [path for path in pathlib.Path(dir_path).glob("*.jpg")]
image_path_list.sort()

images = []
for img_path in image_path_list:
    img = image.load_img(img_path, target_size=(224, 224))
    raw_image = image.img_to_array(img)
    images.append(raw_image)

images = np.array(images)

# colabでは以下を実行するとメモリが溢れると思います。
# images = np.r_[images, tf.image.flip_left_right(images)]
# images = np.r_[images, tf.image.adjust_brightness(images, 0.8)]
preprocessed = resnet.preprocess_input(images)

model = tf.keras.applications.ResNet152(
    include_top=False, weights='imagenet', input_tensor=None,
    input_shape=None, pooling='avg', classes=1000
)
features = model.predict(preprocessed)

dimensions = 2048
index = AnnoyIndex(dimensions, 'euclidean')
for i, feature in enumerate(features):
    index.add_item(i, feature)

index.build(10, n_jobs=-1)
index.save('feature.ann')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5


True

## インデックスのロード

In [4]:
index.load('feature.ann')

True

## インデックスの検索

In [5]:
index.get_nns_by_item(0, 10)

[0, 3268, 2609, 7, 1636, 3983, 3419, 3364, 3712, 3892]