In [1]:
# This program is just implemented as a help file to calculate encodings and 
# apply TSNE algorithm to see if anchor images could have been distinguished 
# from each other by the model after the training.

# The file:
# 1) re-loads the pretrained model (the one to be used in prediction)
# 2) For each anchor image, the face encoding is calculated and saved ina python dictionary.
#    This dictionary is saved in a file so that we can skip re-calculating face encodings of anchor images
#    each time we want to make a prediction later on. Face encodings can just be read from the .dat file
#    where we save anchor image paths and anchor image face encodings.
# 3) TSNE is applied to 128 dimensional face encodings of anchor images to reduce the number of dimensions
#    down to 3 and we save results of TSNE to a csv file to be able to have a closer look later on. 

In [1]:
import pandas as pd
import tensorflow as tf
#from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
#import PIL
#from PIL import Image
from keras.models import Model
from keras.layers import Dense, Input, subtract, concatenate, Lambda, add, maximum
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, RMSprop
from keras.models import load_model, model_from_json
import numpy as np
import pickle
from sklearn.manifold import TSNE

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# load the encoding_network to make predictions based on trained network

json_file = open('/home/cesncn/Desktop/github_projects/face_recognition/code/saved_model/encoding_network_arch.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
encoding_network = model_from_json(loaded_model_json)
# load weights into new model
encoding_network.load_weights('saved_model/encoding_network_weights.h5')

In [5]:
# 1) Put each anchor_img_path and anchor_encoding to a dictionary and later save all these pairs
#    in a new file that will later be used when making predictions.
# 2) Create the array of encodings that will be input to TSNE. And run TSNE.

# Note that pd.read_csv function assumes that the first row is header and skips this row when reading..
reader = pd.read_csv('../dataset/train/anchor_img_paths.csv', chunksize=1)

encodings_list = []
encodings_dict = {}

for chunk in reader:
    encoding_net_anchor_inputs = np.empty((0, 224, 224, 3))
    anchor_img = image.load_img(chunk.iloc[0, 0], target_size=(224, 224))  # chunk.iloc[0, 0] is the img_path
    anchor_img = image.img_to_array(anchor_img)
    anchor_img = np.expand_dims(anchor_img, axis=0)
    anchor_img = preprocess_input(anchor_img)
    encoding_net_anchor_inputs = np.append(encoding_net_anchor_inputs, anchor_img, axis=0)
    anchor_encoding = encoding_network.predict([encoding_net_anchor_inputs], 
                                               batch_size = 1, 
                                               verbose = 0)   
    
    # save anchor encoding in a file to later use in predictions
    encodings_dict[chunk.iloc[0, 0]] = anchor_encoding   # chunk.iloc[0, 0] is the img_path
    
    print(anchor_encoding.shape)
    encodings_list.append(anchor_encoding)

with open('anchor_encodings_dict.dat', 'wb') as f:
    pickle.dump(encodings_dict, f)

encodings_array = np.array(encodings_list)
print(encodings_array.shape)
reshaped_encodings_array = np.reshape(encodings_array, 
                                      (encodings_array.shape[0], encodings_array.shape[2]))
print(reshaped_encodings_array.shape)
encodings_embedded = TSNE(n_components=2, perplexity=10).fit_transform(reshaped_encodings_array)
print(encodings_embedded.shape)

(1, 128)
(1, 128)
(1, 128)
(1, 128)
(1, 128)
(1, 128)
(1, 128)
(1, 128)
(8, 1, 128)
(8, 128)
(8, 2)


In [7]:
# Just write the results from TSNE into a CSV file

reader = pd.read_csv('../dataset/train/anchor_img_paths.csv', chunksize=1)
i = 0
write_header = True  # Needed to get header for first chunk
for chunk in reader:
    chunk['tsne_dim_1'] = encodings_embedded[i, 0]
    chunk['tsne_dim_2'] = encodings_embedded[i, 1]
    i += 1
    chunk.to_csv('anchor_img_tsne.csv', mode='a', header=write_header, index=False)
    write_header = False