# 抽出した特徴量のロード

右側 Data パネルの Connectionsから "Insert to code" で credential を入力

bucket_nameを指定

In [None]:
from botocore.client import Config
import ibm_boto3

cos = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials_1['api_key'],
    ibm_auth_endpoint=credentials_1['iam_url'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials_1['url'])

bucket_name = ''

cos.download_file(Filename='data/train_naive_inceptionV3_features.npy', Bucket=bucket_name, Key='data/train_naive_inceptionV3_features.npy')
cos.download_file(Filename='data/validation_naive_inceptionV3_features.npy', Bucket=bucket_name, Key='data/validation_naive_inceptionV3_features.npy')
cos.download_file(Filename='data/test_naive_inceptionV3_features.npy',Bucket=bucket_name,Key='data/test_naive_inceptionV3_features.npy')

In [None]:
import numpy as np
import pandas as pd

train_feature = np.load('./data/train_naive_inceptionV3_features.npy')
val_feature = np.load('./data/validation_naive_inceptionV3_features.npy')
test_feature = np.load('./data/test_naive_inceptionV3_features.npy')

train_df = pd.read_csv('./data/train.csv')
val_df = pd.read_csv('./data/validation.csv')
test_df = pd.read_csv('./data/test.csv')

print('Train:\t\t', train_feature.shape, train_df.shape)
print('Validation:\t', val_feature.shape, val_df.shape)
print('Test:\t\t', test_feature.shape, test_df.shape)

# k-NN モデルの生成

In [None]:
# Normalize features
train_norm = np.linalg.norm(train_feature, axis=1, keepdims=True)
val_norm = np.linalg.norm(val_feature, axis=1, keepdims=True)
test_norm = np.linalg.norm(test_feature, axis=1, keepdims=True)

train_norm_feature = train_feature / train_norm
val_norm_feature = val_feature / val_norm
test_norm_feature = test_feature / test_norm

In [None]:
# Merge train and validation features
train_val_norm_feature = np.concatenate((train_norm_feature, val_norm_feature), axis=0)
train_val_df = pd.concat((train_df, val_df), axis=0)
train_val_df = train_val_df.reset_index(drop=True)

In [None]:
# Implement KNN model
import os
from sklearn.neighbors import NearestNeighbors

if not os.path.exists('./result'):
    os.mkdir('./result')
    
knn = NearestNeighbors(n_neighbors=50, algorithm='auto', leaf_size=30, metric='minkowski', p=2, n_jobs=-1)
knn.fit(train_val_norm_feature)

In [None]:
# Search the first 50 neighbors
distance, neighbor_index = knn.kneighbors(test_norm_feature, return_distance=True)

predictions = []
for neighbors in neighbor_index:
    predictions.append(train_val_df.loc[neighbors]['landmark_id'].values)

predictions = np.array(predictions)

## Accuracyの算出

In [None]:
# Helper function
def accuracy(true_label, prediction, top=1):
    """ function to calculate the prediction accuracy """
    prediction = prediction[:, :top]
    count = 0
    for i in range(len(true_label)):
        if true_label[i] in prediction[i]:
            count += 1
            
    return count / len(true_label)

In [None]:
print('Top  1 accuracy:\t', accuracy(test_df['landmark_id'].values, predictions, top=1))
print('Top  5 accuracy:\t', accuracy(test_df['landmark_id'].values, predictions, top=5))
print('Top 10 accuracy:\t', accuracy(test_df['landmark_id'].values, predictions, top=10))
print('Top 20 accuracy:\t', accuracy(test_df['landmark_id'].values, predictions, top=20))

In [None]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
import glob
%matplotlib inline

num_query = 30
num_result = 10

plt.figure(figsize=(25,1))
plt.subplots_adjust(wspace=0.01, hspace=0.01)
plt.subplot(1,num_result + 2,1)
plt.text(0,0,"query",fontsize=20)
plt.axis('off')
plt.subplot(1,num_result + 2,(3,num_result + 2))
plt.text(0,0,"Similary Images",fontsize=20)
plt.axis('off')

plt.figure(figsize=(25,75))
plt.subplots_adjust(wspace=0.02)
for i in range(num_query):
    plt.subplot(num_query,num_result + 2 , i * (num_result + 2) + 1)
    image_id = test_df.loc[i]['image_id']
    query_landmark_id = test_df.loc[i]['landmark_id']
    im = Image.open('data/test/' + str(image_id) + '.jpg')
    im_list = np.asarray(im)
    plt.imshow(im_list)
    plt.axis('off')
    for j in range(num_result):
        plt.subplot(num_query,num_result + 2 , i * (num_result  + 2) + j + 3)
        image_id = train_val_df.loc[neighbor_index[i][j]]['image_id']
        sim_landmark_id = train_val_df.loc[neighbor_index[i][j]]['landmark_id']
        file = glob.glob('data/*/' + str(image_id) + '.jpg')
        if len(file) == 1:
            im = Image.open(file[0])
            if query_landmark_id == sim_landmark_id:
                draw = ImageDraw.Draw(im)
                draw.line((0, 0, 0, im.height), fill=(0, 0, 255), width=20)
                draw.line((0, im.height, im.width, im.height), fill=(0, 0, 255), width=20)
                draw.line((im.width, im.height, im.width, 0), fill=(0, 0, 255), width=20)
                draw.line((im.width, 0, 0, 0), fill=(0, 0, 255), width=20)
            im_list = np.asarray(im)
            plt.imshow(im_list)
            plt.axis('off')
            
plt.show()