#### Library Import

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras import Sequential
from tensorflow.keras.layers import Flatten, Dense
from numpy.linalg import norm
from sklearn.decomposition import PCA
from glob import glob

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/Work_to_do

/content/drive/MyDrive/Work_to_do


In [None]:
# !unzip Face\ Dataset.zip

### 1. Creating the model

In [None]:
model = VGG16(weights="imagenet", include_top=False)        #Only the feature extraction layers are required.

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


### 2. Auxiliary Functions

#### <p style = "color:red">Extracting image features and resizing the tensors. </p>

In [None]:
import random

import numpy as np
import torch
from PIL import Image
import cv2
from imgaug import augmenters as iaa
from PIL import Image, ImageEnhance, ImageOps
import warnings
warnings.filterwarnings('ignore')
def KeepOriginalAug(img):


  ori_h1,ori_w1,c = img.shape
  img  = cv2.resize(img, (512, 512))
  h1,w1,c = img.shape
  mask_zero = np.zeros((h1,w1, c), img.dtype)


  beta=1
  lam = np.random.beta(beta, beta)
  x1, y1,x2,y2 = saliency_bbox(img, lam)


  # if no saliency detected, then pick the next image
  if x1==x2 or y1==y2:
      return img



  mask_zero[x1: x2, y1: y2, :] = img[x1: x2, y1: y2,:]


  sizes= [(w1-((w1-y2)+(y2-y1)), h1-((h1-x2)+(x2-x1))),
  (w1-((w1-y2)+(y2-y1)), x2-x1),
  (w1-((w1-y2)+(y2-y1)), h1-x2),
  (y2-y1, h1-((h1-x2)+(x2-x1))),
  (y2-y1, h1-x2),
  (w1-y2, h1-((h1-x2)+(x2-x1))),
  (w1-y2, x2-x1),
  (w1-y2, h1-x2)
  ]
  areas=[]
  for sz in sizes:
      areas.append(sz[0]*sz[1])

  areas= np.array(areas)
  sizes = np.array(sizes)


  big  = 512
  #  size with its bound box (x1,x2,y1,y2)
  boxes = [[0,x1,0,y1],
              [x1,x2,0,y1],
              [x2,big,0,y1],
              [0,x1,y1,y2],
              [x2,big,y1,y2],
              [0,x1,y2,big],
              [x1,x2,y2,big],
              [x2,big,y2,big]
              ]
  boxes = np.array(boxes)



  idx = np.random.choice(list(range(len(areas[areas!=0]))))
  h2, w2 = sizes[areas!=0][idx]
  x12, x22, y12,y22= boxes[areas!=0][idx]

  resized  = cv2.resize(mask_zero[x1: x2, y1: y2, : ], (h2,w2))

  img = augment(img.copy())
  img = np.reshape(img, (img.shape[1],img.shape[2],img.shape[3]))
  resized = augment(resized.copy())
  resized = np.reshape(resized, (resized.shape[1:]))
  img[x12: x22, y12: y22] = resized
  # augment(resized.copy())
  # self.tensor(self.auto(self.pil(resized)))

  return cv2.resize(img, (ori_h1,ori_w1))

def saliency_bbox(img, lam):
    size = img.shape
    W = size[1]
    H = size[0]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # initialize OpenCV's static fine grained saliency detector and
    # compute the saliency map
    temp_img = img.copy()

    saliency = cv2.saliency.StaticSaliencyFineGrained_create()

    (success, saliencyMap) = saliency.computeSaliency(temp_img)
    saliencyMap = (saliencyMap * 255).astype("uint8")

    maximum_indices = np.unravel_index(np.argmax(saliencyMap, axis=None), saliencyMap.shape)
    x = maximum_indices[0]
    y = maximum_indices[1]

    bbx1 = np.clip(x - cut_w // 2, 0, W)
    bby1 = np.clip(y - cut_h // 2, 0, H)
    bbx2 = np.clip(x + cut_w // 2, 0, W)
    bby2 = np.clip(y + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2



def augment(images):
    # Input to `augment()` is a TensorFlow tensor which
    # is not supported by `imgaug`. This is why we first
    # convert it to its `numpy` variant.
    rand_aug = iaa.RandAugment(n=3, m=7)
    images = np.reshape(images, (1, images.shape[0], images.shape[1], images.shape[2]))
    return rand_aug(images=images)



def extract_features(img_path):
    img = image.load_img(img_path, target_size=(650,500))
    x= KeepOriginalAug(image.img_to_array(img).astype('uint8'))
    # x = image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return(model.predict(x))


def reduce_dimensions(features):
    # Each image converted to a tensor of size (1x400)
    pca = PCA()
    features = features.reshape(20,15*512)
    pca.fit(features)
    features_trans = pca.transform(features)
    return(np.squeeze(features_trans.reshape(1,400)))

#### Calculating Image Similarity Scores

<li>Image similarity is calculated as cosine similarity of extracted and resized features of two images.</li>
<li>Higher the similarity score, more different the images. Two same images will have a similarity score of 0.</li>
<li>Two types of images similarity scores: <li> <b>Image similarity intra </b> is the mean of image similarity score of every image with all other images in the image set. </li> <li><b>Image similarity cross</b> is the mean of all of the image similarity scores between the images across all the given image sets. </li></li>
<li>Image similarity intra will compare one image set while cross will assess multiple image sets. </li>

In [None]:
def img_sim_score(features_1, features_2):
    sim = (np.dot(features_1,features_2))/(norm(features_1,2)*norm(features_2,2))
    return(1-sim)

def img_sim_iterator(image_set_1, image_set_2, img_sim_type):
    final_list = []
    if img_sim_type == 'intra' and all(np.array_equal(m, n) for m, n in zip(image_set_1, image_set_2)):
        for i in range(0,len(image_set_1)):
            for j in range(0,len(image_set_2)):
                if not image_set_1[i].all == image_set_2[j].all:
                    final_list.append(img_sim_score(image_set_1[i],image_set_2[j]))
    elif img_sim_type == 'cross' and not (all(np.array_equal(m, n) for m, n in zip(image_set_1, image_set_2))):
        for i in range(0,len(image_set_1)):
            for j in range(0,len(image_set_2)):
                final_list.append(img_sim_score(image_set_1[i],image_set_2[j]))
    return final_list

def img_sim_intra_calculator(image_list):
    feat_list = []
    for img in image_list:
        feat_list.append(reduce_dimensions(extract_features(img)))
    return np.mean(img_sim_iterator(feat_list, feat_list, 'intra'))

def img_sim_cross_calculator(image_list):
    feat_list_combined = []
    for ls in image_list:
        feat_list = []
        for img in ls:
            feat_list.append(reduce_dimensions(extract_features(img)))
        feat_list_combined.append(feat_list)
    isc_list = []
    for i in range(0,len(feat_list_combined)):
        temp = feat_list_combined.pop()
        for i in range(0,len(feat_list_combined)):
            isc_list.append(np.mean(img_sim_iterator(temp,feat_list_combined[i],'cross')))
    return np.mean(isc_list)

### 3. Calculating Image Similarity Scores

In [None]:
cd Face\ Dataset

/content/drive/MyDrive/Work_to_do/Face Dataset


### <b style="color:red">General Function to calculate the scores</b>

In [None]:
def Calculate_intra_cross_score(big_list, profession):
  for rn in range(5):
    ceo_img_sim_intra_scores_list = []
    for ls in big_list:
        ceo_img_sim_intra_scores_list.append(img_sim_intra_calculator(ls))

    ceo_img_sim_cross_score = img_sim_cross_calculator(big_list)
    ceo_img_sim_cross_score_list = [ceo_img_sim_cross_score]*9


    lang_locale_list = ['Arabic-West Asia & North Africa','English-North America','English-West Europe','Hindi-South Asia','Indonesian-SE Asia','Mandarin-East Asia','Russian-East Europe','Spanish-Latin America','Swahili-Sub Saharan Africa']
    column_list = ['Query', 'Lang&Locale','Image_List','ImgSim_Intra','ImgSim_Cross']
    ceo_query_list = [profession]*9
    ceo_df = pd.DataFrame(list(zip(ceo_query_list, lang_locale_list, big_list,ceo_img_sim_intra_scores_list,ceo_img_sim_cross_score_list)),
                  columns = column_list)
    ceo_df.to_csv('./Results/'+profession+'_KeepOriginalAug_'+str(rn)+'.csv')

### <b style="color:red">CEO</b>

#### Loading the images

In [None]:
ceo_ar = [f for f in glob(f'Faces/CEO/Arabic/*')]
ceo_engna = [f for f in glob(f'Faces/CEO/English_NA/*')]
ceo_engwe = [f for f in glob(f'Faces/CEO/English_WE/*')]
ceo_hi = [f for f in glob(f'Faces/CEO/Hindi/*')]
ceo_id = [f for f in glob(f'Faces/CEO/Indonesian/*')]
ceo_mnd = [f for f in glob(f'Faces/CEO/Mandarin/*')]
ceo_ru = [f for f in glob(f'Faces/CEO/Russian/*')]
ceo_es = [f for f in glob(f'Faces/CEO/Spanish/*')]
ceo_sw = [f for f in glob(f'Faces/CEO/Swahili/*')]

ceo_list = [ceo_ar,ceo_engna,ceo_engwe,ceo_hi,ceo_id,ceo_mnd,ceo_ru,ceo_es,ceo_sw]
Calculate_intra_cross_score(ceo_list, 'CEO')



### <b style="color:red">Engineer</b>

#### Loading the images

In [None]:
engr_ar = [f for f in glob(f'Faces/Engineer/Arabic/*')]
engr_engna = [f for f in glob(f'Faces/Engineer/English_NA/*')]
engr_engwe = [f for f in glob(f'Faces/Engineer/English_WE/*')]
engr_hi = [f for f in glob(f'Faces/Engineer/Hindi/*')]
engr_id = [f for f in glob(f'Faces/Engineer/Indonesian/*')]
engr_mnd = [f for f in glob(f'Faces/Engineer/Mandarin/*')]
engr_ru = [f for f in glob(f'Faces/Engineer/Russian/*')]
engr_es = [f for f in glob(f'Faces/Engineer/Spanish/*')]
engr_sw = [f for f in glob(f'Faces/Engineer/Swahili/*')]

engr_list = [engr_ar,engr_engna,engr_engwe,engr_hi,engr_id,engr_mnd,engr_ru,engr_es,engr_sw]
Calculate_intra_cross_score(engr_list, 'Engineer')



### <b style="color:red">Nurse</b>

#### Loading the images

In [None]:
nur_ar = [f for f in glob(f'Faces/Nurse/Arabic/*')]
nur_engna = [f for f in glob(f'Faces/Nurse/English_NA/*')]
nur_engwe = [f for f in glob(f'Faces/Nurse/English_WE/*')]
nur_hi = [f for f in glob(f'Faces/Nurse/Hindi/*')]
nur_id = [f for f in glob(f'Faces/Nurse/Indonesian/*')]
nur_mnd = [f for f in glob(f'Faces/Nurse/Mandarin/*')]
nur_ru = [f for f in glob(f'Faces/Nurse/Russian/*')]
nur_es = [f for f in glob(f'Faces/Nurse/Spanish/*')]
nur_sw = [f for f in glob(f'Faces/Nurse/Swahili/*')]

nur_list = [nur_ar,nur_engna,nur_engwe,nur_hi,nur_id,nur_mnd,nur_ru,nur_es,nur_sw]
Calculate_intra_cross_score(nur_list, 'Nurse')




### <b style="color:red">Politician</b>

#### Loading the images

In [None]:
pol_ar = [f for f in glob(f'Faces/Politician/Arabic/*')]
pol_engna = [f for f in glob(f'Faces/Politician/English_NA/*')]
pol_engwe = [f for f in glob(f'Faces/Politician/English_WE/*')]
pol_hi = [f for f in glob(f'Faces/Politician/Hindi/*')]
pol_id = [f for f in glob(f'Faces/Politician/Indonesian/*')]
pol_mnd = [f for f in glob(f'Faces/Politician/Mandarin/*')]
pol_ru = [f for f in glob(f'Faces/Politician/Russian/*')]
pol_es = [f for f in glob(f'Faces/Politician/Spanish/*')]
pol_sw = [f for f in glob(f'Faces/Politician/Swahili/*')]

pol_list = [pol_ar,pol_engna,pol_engwe,pol_hi,pol_id,pol_mnd,pol_ru,pol_es,pol_sw]
Calculate_intra_cross_score(pol_list, 'Politician')




### <b style="color:red">School Teacher</b>

#### Loading the images

In [None]:
st_ar = [f for f in glob(f'Faces/School Teacher/Arabic/*')]
st_engna = [f for f in glob(f'Faces/School Teacher/English_NA/*')]
st_engwe = [f for f in glob(f'Faces/School Teacher/English_WE/*')]
st_hi = [f for f in glob(f'Faces/School Teacher/Hindi/*')]
st_id = [f for f in glob(f'Faces/School Teacher/Indonesian/*')]
st_mnd = [f for f in glob(f'Faces/School Teacher/Mandarin/*')]
st_ru = [f for f in glob(f'Faces/School Teacher/Russian/*')]
st_es = [f for f in glob(f'Faces/School Teacher/Spanish/*')]
st_sw = [f for f in glob(f'Faces/School Teacher/Swahili/*')]
st_list = [st_ar,st_engna,st_engwe,st_hi,st_id,st_mnd,st_ru,st_es,st_sw]
Calculate_intra_cross_score(st_list, 'School Teacher')

