# Check GPU version.

In [None]:
!nvidia-smi

# Mount google drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install TensorFlow-1.14 GPU.

In [None]:
# Select TensorFlow-1.x version.
%tensorflow_version 1.x

# Uninstall previous TensorFlow version.
!pip uninstall tensorflow -y 1>/dev/null 2>/dev/null 
!pip uninstall tensorflow-gpu -y 1>/dev/null 2>/dev/null 

# Install TensorFlow-1.14 and Keras-2.2.4.
!pip install --upgrade tensorflow==1.14.0 1>/dev/null 2>/dev/null 
!pip install --upgrade tensorflow-gpu==1.14.0 1>/dev/null 2>/dev/null 
!pip install --upgrade keras==2.2.4 1>/dev/null 2>/dev/null 

# Restart the runtime.

# Set the root directory.

In [None]:
import os

root_dir = '/content/'
os.chdir(root_dir)

!ls -al

# Import TensorFlow-1.14.

In [None]:
try:
  %tensorflow_version 1.x
except Exception:
  pass

import tensorflow as tf
from tensorflow.keras import backend as K

import tensorflow.keras.layers as layers
import tensorflow.keras.models as models

import numpy as np
np.random.seed(7)

import matplotlib.pyplot as plot

print(tf.__version__)

# Download and extract aligned dataset from goolge drive.

### VGG Face-2 aligned train dataset 0-1 split.

In [None]:
!gdown --id 1ciyZPE_EReqa6EgzKjxWmQsDiXk_cy4y # aligned_vggface2_train-0-1.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_train-0-1.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_train-0-1.tar.gz
!ls -al

### VGG Face-2 aligned train dataset 2-3 split.

In [None]:
!gdown --id 1-1enkjPi4U5RNmuMu8zIeCnua47R5p5M # aligned_vggface2_train-2-3.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_train-2-3.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_train-2-3.tar.gz
!ls -al

### VGG Face-2 aligned train dataset 4-5 split.

In [None]:
!gdown --id 1-5nIciF8aTAivCkFVLbSxIEIogPX6Qvm # aligned_vggface2_train-4-5.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_train-4-5.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_train-4-5.tar.gz
!ls -al

### VGG Face-2 aligned train dataset 6-7 split.

In [None]:
!gdown --id 1-SQPlQvDDvGzBha8FjynyKuRJoYILSSB # aligned_vggface2_train-6-7.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_train-6-7.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_train-6-7.tar.gz
!ls -al

### VGG Face-2 aligned train dataset 8-9 split.

In [None]:
!gdown --id 1-cRXzHpq8ZvyC6cT2ERIZiC_P1y6S5WQ # aligned_vggface2_train-8-9.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_train-8-9.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_train-8-9.tar.gz
!ls -al

# Check downloaded train dataset.

In [None]:
!ls -al
!ls -l train/ | grep ^d | wc -l

# Create the dataset.

### Preprocess the dataset.

In [None]:
image_shape = (224, 224, 3)
image_load_shape = (256, 256, 3)

In [None]:
number_of_features = 256

In [None]:
batch_size = 32
buffer_size = 2048

In [None]:
load_previous_weights = True
save_current_weights = True

In [None]:
dataset_root_dir = '/content/train'

In [None]:
directory_names = os.listdir(dataset_root_dir)
class_names = [ directory_name for directory_name in directory_names if os.path.isdir(os.path.join(dataset_root_dir, directory_name)) ]
print('number of classes -', len(class_names))

In [None]:
names_to_identifiers = {}
identifiers_to_names = {}
for identifier, class_name in enumerate(class_names):
  names_to_identifiers[class_name] = identifier
  identifiers_to_names[identifier] = class_name
  
#print(len(names_to_identifiers), len(identifiers_to_names))    

### Load image using an filename.

In [None]:
def load_image(image_filename):
  input_image = tf.io.read_file(image_filename)
  input_image = tf.image.decode_jpeg(input_image, 3)
  return(input_image)

### Normalize image to [0., 1.].

In [None]:
def normalize_image(image):
  image = tf.cast(image, tf.float32)
  image = image / 255.0
  return(image)

### Random crop image.

In [None]:
def random_crop(image):
  cropped_image = tf.image.random_crop(image, size=image_shape)
  return(cropped_image)

### Apply random jitter to input image.

In [None]:
def random_jitter(image):  
  image = tf.image.resize(image, [image_load_shape[0], image_load_shape[1]])  
  image = random_crop(image)
  image = tf.image.random_flip_left_right(image)
  return(image)

### Define preprocessing for train dataset split.

In [None]:
def preprocess_train_dataset(image_filename, image_label):  
  
  image = load_image(image_filename)
  image = random_jitter(image)
  image = normalize_image(image)

  return(image, image_label)

# Preprocess train dataset split.

In [None]:
auto_tune = tf.data.experimental.AUTOTUNE

In [None]:
def create_dataset(dataset_root_dir, class_names):  
  image_filenames = []
  image_labels = []  

  for class_name in class_names:
    class_identifier = names_to_identifiers[class_name]
    class_root_dir = os.path.join(dataset_root_dir, class_name)
    #print(class_root_dir)

    class_image_filenames = os.listdir(class_root_dir)
    for class_image_filename in class_image_filenames:
      current_filename = os.path.join(class_root_dir, class_image_filename)

      image_filenames.append(current_filename)
      image_labels.append(class_identifier)

  number_of_batches = len(image_filenames) // batch_size  
  memory_data = (image_filenames, image_labels)  
  dataset = tf.data.Dataset.from_tensor_slices(memory_data)
  
  return(dataset, number_of_batches)

In [None]:
train_dataset, number_of_batches = create_dataset(dataset_root_dir, class_names)
print('number of batches -', number_of_batches)

In [None]:
train_dataset = train_dataset.map(preprocess_train_dataset, num_parallel_calls=auto_tune)
train_dataset = train_dataset.shuffle(buffer_size)
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
train_dataset = train_dataset.prefetch(auto_tune)

# Create the loss function.

In [None]:
def pairwise_distance(feature, squared=False):
    """Computes the pairwise distance matrix with numerical stability.
    output[i, j] = || feature[i, :] - feature[j, :] ||_2
    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.
    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    # yapf: disable
    pairwise_distances_squared = tf.math.add(
        tf.math.reduce_sum(
            tf.math.square(feature),
            axis=[1],
            keepdims=True),
        tf.math.reduce_sum(
            tf.math.square(tf.transpose(feature)),
            axis=[0],
            keepdims=True)) - 2.0 * tf.matmul(feature, tf.transpose(feature))
    # yapf: enable

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = tf.math.maximum(pairwise_distances_squared,
                                                 0.0)
    # Get the mask where the zero distances are at.
    error_mask = tf.math.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = tf.math.sqrt(
            pairwise_distances_squared +
            tf.cast(error_mask, dtype=tf.dtypes.float32) * 1e-16)

    # Undo conditionally adding 1e-16.
    pairwise_distances = tf.math.multiply(
        pairwise_distances,
        tf.cast(tf.math.logical_not(error_mask), dtype=tf.dtypes.float32))

    num_data = tf.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = tf.ones_like(pairwise_distances) - tf.linalg.diag(
        tf.ones([num_data]))
    pairwise_distances = tf.math.multiply(pairwise_distances,
                                          mask_offdiagonals)
    return pairwise_distances

In [None]:
def _masked_maximum(data, mask, dim=1):
    """Computes the axis wise maximum over chosen elements.
    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the maximum.
    Returns:
      masked_maximums: N-D `Tensor`.
        The maximized dimension is of size 1 after the operation.
    """
    axis_minimums = tf.math.reduce_min(data, dim, keepdims=True)
    masked_maximums = tf.math.reduce_max(
        tf.math.multiply(data - axis_minimums, mask), dim,
        keepdims=True) + axis_minimums
    return masked_maximums

In [None]:
def _masked_minimum(data, mask, dim=1):
    """Computes the axis wise minimum over chosen elements.
    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the minimum.
    Returns:
      masked_minimums: N-D `Tensor`.
        The minimized dimension is of size 1 after the operation.
    """
    axis_maximums = tf.math.reduce_max(data, dim, keepdims=True)
    masked_minimums = tf.math.reduce_min(
        tf.math.multiply(data - axis_maximums, mask), dim,
        keepdims=True) + axis_maximums
    return masked_minimums

In [None]:
def triplet_semihard_loss(y_true, y_pred, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.
    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
    """
    labels, embeddings = y_true, y_pred
    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    # Compute the mask.
    pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
    mask = tf.math.logical_and(
        tf.tile(adjacency_not, [batch_size, 1]),
        tf.math.greater(pdist_matrix_tile,
                        tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
    mask_final = tf.reshape(
        tf.math.greater(
            tf.math.reduce_sum(
                tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True),
            0.0), [batch_size, batch_size])
    mask_final = tf.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    mask = tf.cast(mask, dtype=tf.dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = tf.reshape(
        _masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = tf.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = tf.tile(
        _masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = tf.where(mask_final, negatives_outside,
                                   negatives_inside)

    loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = tf.cast(
        adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(
            tf.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = tf.math.reduce_sum(mask_positives)

    triplet_loss = tf.math.truediv(
        tf.math.reduce_sum(
            tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)),
        num_positives)

    return triplet_loss

# Create the model.

In [None]:
from tensorflow.compat.v1.keras.applications import ResNet50

### Create the base model.

In [None]:
base_model = ResNet50(include_top=False, weights=None, input_shape=image_shape, pooling='avg')

### Create the main model.

In [None]:
image_features = base_model.output
image_features = layers.Flatten(name='flatten')(image_features)
image_features = layers.Dense(number_of_features, name='image_features')(image_features)
image_features = layers.Lambda(lambda  x: K.l2_normalize(x, axis=1))(image_features)

In [None]:
model = models.Model(inputs=base_model.input, outputs=image_features, name='model')

### Show the model summary.

In [None]:
#model.summary()

# Train the model.

### Compile the model.

In [None]:
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5, beta_2=0.999)

In [None]:
model.compile(optimizer=optimizer, loss=triplet_semihard_loss)

### Train the model.

In [None]:
import os

def model_filename():
  return('model.h5')

def weight_root_dir():
  return('/content/drive/My Drive/')

def model_gdrive_filename():    
  return(os.path.join(weight_root_dir(), model_filename()))

In [None]:
if(load_previous_weights):
  model.load_weights(model_gdrive_filename())

In [None]:
epochs = 1

In [None]:
history = model.fit(train_dataset, epochs=epochs)

In [None]:
if(save_current_weights):
  model.save_weights(model_gdrive_filename())

# Evaluate the model.

### Download and extract aligned test dataset from goolge drive.

In [None]:
!gdown --id 1WEftISRMb-8v9iIFomzCzSAzbEvxOKFu # aligned_vggface2_test.tar.gz
!ls -al

In [None]:
!tar -xzf aligned_vggface2_test.tar.gz
!ls -al

In [None]:
!rm -rf aligned_vggface2_test.tar.gz
!ls -al

### Check downloaded test dataset.

In [None]:
!ls -al
!ls -l test/ | grep ^d | wc -l

# Preprocess input image.

In [None]:
def center_crop(input_image , target_size):
  target_height, target_width = target_size
  
  height_offset = (input_image.shape[0] - target_height) // 2 
  width_offset = (input_image.shape[1] - target_width) // 2 

  targate_image = input_image[height_offset:(height_offset+target_height), width_offset:(width_offset+target_width) ]
  return(targate_image)

In [None]:
import cv2

def preprocess_image(image_filename): 
  input_image = cv2.imread(image_filename) 
  input_image = cv2.resize(input_image, (image_load_shape[0], image_load_shape[1])) 
  input_image = center_crop(input_image, (image_shape[0], image_shape[1]))  
  input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
  input_image = input_image / 255.0  
  input_image = np.expand_dims(input_image, axis=0)   
  return( input_image )

# Compute image features.

In [None]:
def compute_image_features(model, image_filename): 
  current_image = preprocess_image(image_filename)
  current_features = model.predict(current_image)
  current_features = current_features[0]  
  return( current_features )

### Evaluate the model on test dataset.

In [None]:
test_image_dir = '/content/test/'
class_names = os.listdir(test_image_dir)
number_of_images = 0
positive_distance_images = 0
positive_similarity_images = 0

for class_name in class_names:
  class_root_dir = os.path.join(test_image_dir, class_name)
  if(not os.path.isdir(class_root_dir)):
    continue
    
  image_filenames = os.listdir(class_root_dir)
  current_number_of_images = len(image_filenames)  
  image_index = np.random.randint(0, current_number_of_images)
  image_filename = os.path.join(class_root_dir,image_filenames[image_index])  
  base_features = compute_image_features(model, image_filename)

  for image_filename in image_filenames:
    image_filename = os.path.join(class_root_dir, image_filename)
    if(not os.path.isfile(image_filename)):
      continue

    number_of_images = number_of_images + 1
    
    current_features = compute_image_features(model, image_filename)

    current_distance = np.linalg.norm(base_features - current_features)    
    distance_status = current_distance < 0.5
    positive_distance_images = positive_distance_images + distance_status

    current_similarity = np.dot(base_features, np.transpose(current_features))
    similarity_status = current_similarity > 0.5
    positive_similarity_images = positive_similarity_images + similarity_status
    #print(image_filename, current_distance, distance_status, current_similarity, similarity_status)

    if(number_of_images % 1000 == 0) and (number_of_images):
      print('accuracy (distance) - ', positive_distance_images/number_of_images)
      print('accuracy (similarity) - ', positive_similarity_images/number_of_images)
  
print('accuracy (distance) - ', positive_distance_images/number_of_images)
print('accuracy (similarity) - ', positive_similarity_images/number_of_images)