# 第7章 近似近傍探索

## 画像の準備

In [None]:
!mkdir -p ~/.kaggle/
!cp drive/MyDrive/visual-search-book/landscape/kaggle.json ~/.kaggle
!kaggle datasets download arnaud58/landscape-pictures
!unzip landscape-pictures.zip -d landscape-pictures

## Annoyのインストール

In [None]:
!pip install annoy==1.17.0



## ANNインデックスを作成

In [None]:
#!/usr/bin/env python
import pathlib
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import resnet
from tensorflow.keras.preprocessing import image
from annoy import AnnoyIndex

dir_path = "./landscape-pictures/"
image_path_list = [path for path in pathlib.Path(dir_path).glob("*.jpg")]
image_path_list.sort()

images = []
for img_path in image_path_list:
    img = image.load_img(img_path, target_size=(224, 224))
    raw_image = image.img_to_array(img)
    images.append(raw_image)

images = np.array(images)

# colabでは以下を実行するとメモリが溢れると思います。
# images = np.r_[images, tf.image.flip_left_right(images)]
# images = np.r_[images, tf.image.adjust_brightness(images, 0.8)]
preprocessed = resnet.preprocess_input(images)

model = tf.keras.applications.ResNet152(
    include_top=False, weights='imagenet', input_tensor=None,
    input_shape=None, pooling='avg', classes=1000
)
features = model.predict(preprocessed)

dimensions = 2048
index = AnnoyIndex(dimensions, 'euclidean')
for i, feature in enumerate(features):
    index.add_item(i, feature)

index.build(10, n_jobs=-1)
index.save('feature.ann')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5


True

## インデックスのロード

In [None]:
index.load('feature.ann')

True

## インデックスの検索

In [None]:
index.get_nns_by_item(0, 10)

[0, 3268, 2609, 7, 1636, 3983, 3419, 3364, 3712, 3892]