### Running script on TF 2.0

In [1]:
try:
  %tensorflow_version 2.x # enable TF 2.x in Colab
except Exception:
  pass

`%tensorflow_version` only switches the major version: `1.x` or `2.x`.
You set: `2.x # enable TF 2.x in Colab`. This will be interpreted as: `2.x`.


TensorFlow 2.x selected.


### Libraries imported

In [0]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
from scipy import spatial
from os import listdir
import IPython.display as display
import matplotlib.pyplot as plt
from random import randrange

In [3]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


### Download VGG

In [4]:
model = VGG16(weights='imagenet', include_top=False)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
cp gdrive/My\ Drive/celeb.zip .

### unzip file

In [0]:
%%capture
!unzip celeb.zip -d celeb

### Image to Embedding

In [0]:
base_path = '/content/celeb'

In [19]:
import pathlib

train_images_folder = pathlib.Path(base_path+'/train')
test_images_folder = pathlib.Path(base_path+'/test')

print(train_images_folder)

/content/celeb/train


In [0]:
train_image_paths = list(train_images_folder.glob('*/*'))
train_image_paths = [str(path) for path in train_image_paths]

test_image_paths = list(test_images_folder.glob('*/*'))
test_image_paths = [str(path) for path in test_image_paths]

In [23]:
print(len(train_image_paths))
print(len(test_image_paths))

1144
286


In [21]:
train_image_paths[:10]

['/content/celeb/train/Andy_Roddick/Andy_Roddick_0002.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0006.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0004.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0003.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0015.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0005.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0013.jpg',
 '/content/celeb/train/Andy_Roddick/Andy_Roddick_0010.jpg',
 '/content/celeb/train/Richard_Gephardt/Richard_Gephardt_0011.jpg',
 '/content/celeb/train/Richard_Gephardt/Richard_Gephardt_0006.jpg']

In [0]:
train_labels = [pathlib.Path(path).parent.name for path in train_image_paths] #path is str
test_labels = [pathlib.Path(path).parent.name for path in test_image_paths]

In [39]:
train_labels[:10]

['Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Andy_Roddick',
 'Richard_Gephardt',
 'Richard_Gephardt']

In [40]:
test_labels[:10]

['Andy_Roddick',
 'Andy_Roddick',
 'Richard_Gephardt',
 'Richard_Gephardt',
 'Ann_Veneman',
 'Ann_Veneman',
 'Tony_Blair',
 'Tony_Blair',
 'Eduardo_Duhalde',
 'Eduardo_Duhalde']

In [0]:
def get_embeddings(images_path):
  feature_list = []
  
  for img in images_path:
    img_1 = image.load_img(img, target_size=(224, 224)) 
    x = image.img_to_array(img_1)
    #print(x.shape)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features_1 = model.predict(x).ravel()
    feature_list.append(features_1)

  return np.array(feature_list)

In [0]:
train_embeddings = get_embeddings(train_image_paths)

In [31]:
train_embeddings.shape

(1144, 25088)

In [0]:
test_embeddings = get_embeddings(test_image_paths)

In [33]:
test_embeddings.shape

(286, 25088)

# Evaluation on the test set

In [48]:
correct_identified = 0

for index, test_img_embedding in enumerate(test_embeddings):
  diff = (train_embeddings - test_img_embedding)**2
  dist = np.sum(diff, axis=1)
  # print(dist)
  nearest_neighbors = np.argsort(dist)[:10]
  # print(nearest_neighbors)
  # print(test_labels[index], train_labels[nearest_neighbors[0]])

  if test_labels[index] == train_labels[nearest_neighbors[0]]:
    print(test_labels[index], train_labels[nearest_neighbors[0]])
    correct_identified += 1

  # show test image
  # display.display(display.Image(test_image_paths[index]))
  
  # show the nearest images
  # for ind in nearest_neighbors:
  #   print(train_labels[ind])
  #   display.display(display.Image(train_image_paths[ind]))


Jack_Straw Jack_Straw
John_Allen_Muhammad John_Allen_Muhammad
John_Allen_Muhammad John_Allen_Muhammad
Adrien_Brody Adrien_Brody
James_Blake James_Blake
Guillermo_Coria Guillermo_Coria
Jiang_Zemin Jiang_Zemin
Jiang_Zemin Jiang_Zemin
John_Paul_II John_Paul_II
Jennifer_Aniston Jennifer_Aniston
Atal_Bihari_Vajpayee Atal_Bihari_Vajpayee
Saddam_Hussein Saddam_Hussein
Saddam_Hussein Saddam_Hussein
Joe_Lieberman Joe_Lieberman
Lindsay_Davenport Lindsay_Davenport
Angelina_Jolie Angelina_Jolie
Venus_Williams Venus_Williams
Dominique_de_Villepin Dominique_de_Villepin
Joschka_Fischer Joschka_Fischer
Julie_Gerberding Julie_Gerberding
Dick_Cheney Dick_Cheney
Ari_Fleischer Ari_Fleischer
Mike_Weir Mike_Weir
Mike_Weir Mike_Weir
Sergey_Lavrov Sergey_Lavrov
Hu_Jintao Hu_Jintao
Lleyton_Hewitt Lleyton_Hewitt
Carlos_Moya Carlos_Moya
Howard_Dean Howard_Dean
Mark_Philippoussis Mark_Philippoussis
Britney_Spears Britney_Spears
Britney_Spears Britney_Spears
Pierce_Brosnan Pierce_Brosnan
Winona_Ryder Winona_Ryder


In [49]:
correct_identified

37

### Similarity Matrix

In [0]:
# num = len(file_list)
# sim_matrix = np.zeros((num,num))

# for i in range(num):
#   for j in range(num):
#     sim_matrix[i][j] = 1 - spatial.distance.cosine(feature_list[i], feature_list[j])


### Validate on Sample

In [0]:
# rand_index = randrange(num)
# sort_index = np.argsort(sim_matrix[rand_index])

Top 10 similar pokemons

In [0]:
# top_sim = sort_index[::-1][1:10]
# display.display(display.Image(path+listdir(path)[rand_index] ))

# for ind in top_sim:
#   display.display(display.Image(path+listdir(path)[ind] ))

Top 10 disimilar pokemons

In [0]:
# top_dis_sim = sort_index[1:10]
# display.display(display.Image(path+listdir(path)[rand_index] ))

# for ind in top_dis_sim:
#   display.display(display.Image(path+listdir(path)[ind] ))