In [1]:
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import os
import time
from tensorflow.keras.preprocessing import image
from keras.models import load_model
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input
from sklearn.neighbors import NearestNeighbors

In [2]:
def extract_features(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path, target_size = (input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis = 0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features

In [3]:
def get_file_list(root_dir):
    file_list = []
    counter = 0
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            file_list.append(os.path.join(root, filename))
            counter += 1
    print(counter)
    return file_list

In [4]:
root_dir = "./cat_test_images"
filenames = sorted(get_file_list(root_dir))

60


In [5]:
def make_features(model_name):
    model = load_model(model_name)
    
    feature_list = []
    for i in tqdm_notebook(range(len(filenames))):
        feature_list.append(extract_features(filenames[i], model))
    
    feature_file_name = f"./features/cat_features_{model_name.split('/')[-1].split('.')[0]}.pickle"
    pickle.dump(feature_list, open(feature_file_name, "wb"))
    
    return feature_file_name

In [6]:
pickle.dump(filenames, open(f"./features/cat_filenames.pickle", "wb"))

In [7]:
def cat_name(file_name):
    return file_name.split('/')[-1][:-5]

In [12]:
def top3_score(feature_list):
    # 가까운 3개의 이미지 중 하나가 맞으면 + 1
    neighbors = NearestNeighbors(n_neighbors = 5, algorithm = "brute", metric = "euclidean").fit(feature_list)
    
    score = 0
    for image_index in range(60):
        distances, indices = neighbors.kneighbors([feature_list[image_index]])
        
        for close_index in indices[0][1:4]:  # top 3
            if cat_name(filenames[image_index]) == cat_name(filenames[close_index]):
                score += 1
                break
    
    return score

In [13]:
def top1_score(feature_list):
    # 가장 가까운 이미지 하나가 맞으면 + 1
    neighbors = NearestNeighbors(n_neighbors = 5, algorithm = "brute", metric = "euclidean").fit(feature_list)
    
    score = 0
    for image_index in range(60):
        distances, indices = neighbors.kneighbors([feature_list[image_index]])
        
        if cat_name(filenames[image_index]) == cat_name(filenames[indices[0][1]]):
            score += 1
    
    return score

In [10]:
model_list = [f"./knn_models/model_epoch_{i}.h5" for i in range(0, 11)]
feature_files = []

for model in model_list:
    feature_file_name = make_features(model)
    feature_files.append(feature_file_name)

2022-03-03 13:46:10.019340: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(len(filenames))):


  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]



  0%|          | 0/60 [00:00<?, ?it/s]

In [14]:
for feature_file in feature_files:
    feature_list = pickle.load(open(feature_file, "rb"))
    
    print(f"{feature_file.split('/')[-1].split('.')[0]} top 1 score : {top1_score(feature_list)}")
    print(f"{feature_file.split('/')[-1].split('.')[0]} top 3 score : {top3_score(feature_list)}")
    print()

cat_features_model_epoch_0 top 1 score : 30
cat_features_model_epoch_0 top 3 score : 44

cat_features_model_epoch_1 top 1 score : 25
cat_features_model_epoch_1 top 3 score : 35

cat_features_model_epoch_2 top 1 score : 22
cat_features_model_epoch_2 top 3 score : 34

cat_features_model_epoch_3 top 1 score : 20
cat_features_model_epoch_3 top 3 score : 29

cat_features_model_epoch_4 top 1 score : 21
cat_features_model_epoch_4 top 3 score : 35

cat_features_model_epoch_5 top 1 score : 19
cat_features_model_epoch_5 top 3 score : 30

cat_features_model_epoch_6 top 1 score : 18
cat_features_model_epoch_6 top 3 score : 36

cat_features_model_epoch_7 top 1 score : 16
cat_features_model_epoch_7 top 3 score : 30

cat_features_model_epoch_8 top 1 score : 14
cat_features_model_epoch_8 top 3 score : 34

cat_features_model_epoch_9 top 1 score : 12
cat_features_model_epoch_9 top 3 score : 34

cat_features_model_epoch_10 top 1 score : 11
cat_features_model_epoch_10 top 3 score : 27

