In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
pip install --quiet --upgrade deepface

In [None]:
from pinecone import Pinecone, ServerlessSpec
from sklearn.decomposition import PCA
from deepface import DeepFace
from sklearn.manifold import TSNE
from UDCUtils import UDCUtils

import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import contextlib

In [None]:
utils = UDCUtils()

pinecone_api_key = utils.get_pinecone_api_key()

pinecone_api_key

In [None]:
#!wget -q --show-progress -O "images/family_photos.zip" "https://www.dropbox.com/scl/fi/yg0f2ynbzzd2q4nsweti5/family_photos.zip?rlkey=00oeuiii3jgapz2b1bfj0vzys&dl=0"

#!unzip -q images/family_photos.zip images/

In [None]:
def show_img(img):
    l_img = plt.imread(img)
    plt.figure(figsize=[4,4])
    plt.imshow(l_img)

In [None]:
show_img("images/family/mom/P04407_face2.jpg")

## Setup pinecone object

In [None]:
pinecone = Pinecone(api_key=pinecone_api_key)

index_name = utils.create_dlai_index_name("idx-img-")

In [None]:
pinecone.create_index(
    name=index_name,
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    metric="cosine",
    dimension=128
)

INDEX = pinecone.Index(index_name)

## Create Embeddings using DeepFace

In [None]:
from tqdm import tqdm
MODEL="Facenet"
def generate_vectors(folder_list:list):
    VECTOR_FILE = "vectors/FACIAL_SEARCH_MONARCHS.vec"

    with contextlib.suppress(FileNotFoundError):
        os.remove(VECTOR_FILE)

    with open(VECTOR_FILE,"x") as f:
        for person in folder_list:
            files = glob.glob(f'images/family/{person}/*')
            for file in tqdm(files):
                try:
                    embedding = DeepFace.represent(img_path=file, model_name=MODEL, enforce_detection=False)[0]['embedding']
                    f.write(f'{person}:{os.path.basename(file)}:{embedding}\n')
                except (ValueError, UnboundLocalError, AttributeError) as e:
                    print(e)


In [None]:
generate_vectors(['dad','mom','child'])

In [None]:
!head -5 "vectors/FACIAL_SEARCH_MONARCHS.vec"

## Plot images

In [None]:
def gen_tsne_dataframe(person, perplexity, vector_file):
    vectors=[]
    with open(vector_file,"r") as f:
        lines = f.readlines()
        for line in lines:
            p, orig_img, emb = line.split(":")
            if p==person:
                vectors.append(eval(emb))

    pca = PCA(n_components=8)
    tsne = TSNE(2, perplexity=perplexity, random_state=0, n_iter=1000, verbose=0, metric="euclidean", learning_rate=75)
    print(f"Transforming {len(vectors)} vectors")
    pca_transform_vectors = pca.fit_transform(vectors)
    embeddings2d = tsne.fit_transform(pca_transform_vectors)

    return pd.DataFrame({'x':embeddings2d[:,0],'y':embeddings2d[:,1]})

In [None]:
def plot_tsne(perplexity, model, persons, vector_file):
    (_, ax) = plt.subplots(figsize=(8,5))
    #plt.style.use('seaborn-whitegrid')
    plt.grid(color='#EAEAEB', linewidth=0.5)
    ax.spines['top'].set_color(None)
    ax.spines['right'].set_color(None)
    ax.spines['left'].set_color('#2B2F30')
    ax.spines['bottom'].set_color('#2B2F30')
    colormap = {f'{persons[0]}':'#ee8933', f'{persons[1]}':'#4fad5b', f'{persons[2]}':'#4c93db'}

    for person in colormap:
        embeddingsdf = gen_tsne_dataframe(person, perplexity, vector_file)
        ax.scatter(embeddingsdf.x, embeddingsdf.y, alpha=.5, 
                   label=person, color=colormap[person])
    plt.title(f'Scatter plot of faces using {model}', fontsize=16, fontweight='bold', pad=20)
    plt.suptitle(f't-SNE [perplexity={perplexity}]', y=0.92, fontsize=13)
    plt.legend(loc='best', frameon=True)
    plt.show()

In [None]:
plot_tsne(44, 'facenet', persons=['dad','child','mom'], vector_file="vectors/FACIAL_SEARCH_MONARCHS.vec")

## Upsert vectors to pinecone

In [None]:
INDEX.delete(delete_all=True, namespace='')

In [None]:
vector_file="vectors/FACIAL_SEARCH_MONARCHS.vec"
prepped=[]
cnt=0
with open(vector_file, "r") as f:
    for line in tqdm(f):
        cnt = cnt + 1
        person, imgfile, embeddings = line.split(":")
        prepped.append(
            {'id':f'{person}-{cnt}',
            'values':eval(embeddings),
            'metadata':{'person':person, 'file':imgfile}}
        )
        INDEX.upsert(prepped)
        #INDEX.upsert([(f'{person}-{imgfile}', eval(embeddings), {'person':person, 'file':imgfile})])

In [None]:
prepped.clear()

In [None]:
print(cnt)

In [None]:
INDEX.describe_index_stats()

In [None]:
val = INDEX.fetch(ids=['dad-1'])
print(val.to_dict())

In [None]:
#test which of the parent resembles most to the child 
def compute_match_scores(vec_groups, parent, child):
    index = pinecone.Index(index_name)
    parent_vecs = vec_groups[parent]
    print(len(parent_vecs))
    K=10
    SAMPLE_SIZE=10
    sum=0

    for i in tqdm(range(0, SAMPLE_SIZE)):
        print(type(parent_vecs[i]))
        print(f'{child}{type(child)}')
        response = index.query(
            vector=parent_vecs[i],
            filter={
                "person":{"$eq": child}
            },
            top_k=K,
            include_metadata=True
        )
    for r in response["matches"]:
        sum = sum + r["score"]
    print(f"Average match score for {parent} is {sum/(SAMPLE_SIZE*K)}")

In [None]:
def test():
    vector_grps = {"dad":[], "mom":[], "child":[]}
    with open(vector_file, "r") as f:
        for line in tqdm(f):
            person, imgfile, emb = line.split(":")
            vector_grps[person].append(eval(emb))

    print(f"DAD \n{'-' * 20}")
    compute_match_scores(vector_grps, "dad", "child")
    print(f"MOM \n{'-' * 20}")
    compute_match_scores(vector_grps, "mom", "child")

In [None]:
test()

## Checking the matching images

In [None]:
child_base = 'images/family/child/P06310_face1.jpg'
show_img(child_base)

In [None]:
#finding closest image of dad (given the score for dad is greater than mom)
embedding = DeepFace.represent(img_path=child_base, model_name=MODEL)[0]['embedding']
#print(embedding)

response = INDEX.query(
    vector=embedding,
    filter={
        'person':{"$eq":"dad"}
    },
    top_k=10,
    include_metadata=True
)

print(response["matches"])

In [None]:
show_img("images/family/dad/"+response["matches"][0]["metadata"]["file"])