In [16]:
from elasticsearch import Elasticsearch
from pysolr import Solr
from pathlib import Path
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cosine
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from collections import defaultdict

In [2]:
embeddings_path = Path('/media/discoD/Mestrado/NoLeak/Gait/features-all/stored-features')

In [3]:
def get_gait_embeddings(embeddings_path):
    embeddings = defaultdict(dict)
    for npy in embeddings_path.iterdir():
        if npy.name.endswith('.npy'):
            embedding = np.load(npy, allow_pickle=True)[0][0]
            assert len(embedding) == 15872
            uuid = npy.name.split('_')[0]
            embeddings[uuid] = {
                'uuid': uuid,
                'gait_embedding_15872': np.array(embedding).reshape(-1, 1),
                'gait_embedding_15872_str': ','.join([str(value) for value in embedding[:8192]])
            }
    return embeddings

In [4]:
gait_embeddings = get_gait_embeddings(embeddings_path)

In [5]:
references = [
    '1a6ce47f-d707-46d6-a7d9-3580e5897ca1', 
    '377f9fdd-b334-4d4c-a1b8-033fa8f9d575', 
    'e97d8728-4f62-454b-b361-669c3b9e2bd0', 
    'c5487d96-26d0-48ba-b64a-02590d1a0241', 
    '9aa93eee-3980-49c5-bed0-89ada3c35859'
]

In [6]:
reference_gait_embedding = gait_embeddings[references[0]]['gait_embedding_15872']

In [17]:
for embedding in gait_embeddings.values():
    similarity = cosine_similarity(reference_gait_embedding, embedding['gait_embedding_15872'])
    print(similarity.shape)
    print(similarity)
    print(cosine(reference_gait_embedding, embedding['gait_embedding_15872']))
    break

(15872, 15872)
[[-1.  1. -1. ... -1. -1. -1.]
 [ 1. -1.  1. ...  1.  1.  1.]
 [ 1. -1.  1. ...  1.  1.  1.]
 ...
 [ 1. -1.  1. ...  1.  1.  1.]
 [ 1. -1.  1. ...  1.  1.  1.]
 [ 1. -1.  1. ...  1.  1.  1.]]
0.203316330909729


In [None]:
cosine_similarity(np.array(gait_embedding).reshape(-1,1), np.array(embedding.get('gait_embedding_15872')).reshape(-1,1))

In [20]:
similarities = defaultdict(list)
for reference in references:
    gait_embedding = gait_embeddings.get(reference).get('gait_embedding_15872')
    similarities[reference].extend([{embedding.get('uuid'): 1. - cosine(reference_gait_embedding, embedding['gait_embedding_15872'])} for embedding in gait_embeddings.values()])    

In [21]:
similarities

defaultdict(list,
            {'1a6ce47f-d707-46d6-a7d9-3580e5897ca1': [{'00d30e4e-23b7-493e-a74f-37e0a74d2937': 0.796683669090271},
              {'01851c58-f92b-4c19-85f8-2878dd1e4d6a': 0.8190782070159912},
              {'025d6f7e-5605-4284-861f-77513b240de9': 0.897533655166626},
              {'04e753ab-5c53-4fdb-bc8d-4000a47e5244': 0.7834763526916504},
              {'0552add4-8598-4644-b79a-e20058bcbd08': 0.7996620535850525},
              {'06ff673d-5592-4331-a9f4-06d4b63bd2ee': 0.897533655166626},
              {'08071d5b-9cb5-44c2-b880-ac46172704bd': 0.7874904870986938},
              {'0fe34481-e50b-4de0-81b4-901a7cf1d464': 0.8947687149047852},
              {'1bee19cf-597c-4256-b035-f7cc1b7a8e96': 1.0},
              {'0840bb85-c306-4113-9941-60dfcb2ad035': 0.897533655166626},
              {'0866bee2-55a1-4698-b48f-5209631a7a23': 0.8866150379180908},
              {'0b1c9e9f-f166-43bc-88db-94507580f106': 0.987451434135437},
              {'0db46d31-d243-472a-b13e-5b6e58002e

In [3]:
client = Elasticsearch('localhost', port=9200)

In [None]:
client.index(index='gait', doc_type='gait_embedding', body={})

In [None]:
def index_gait_embeddings(file_name, base_path = Path('/media/discoD/Mestrado/NoLeak/gait/')):
    file_path = base_path / file_name
    embedding = np.load(file_path, allow_pickle=True)[0][0]
    assert len(embedding) == 15872
    uuid = file_name.split('_')[0]
    print(uuid)
    solr_client.add([
        {
            'uuid': uuid,
            'template_8192': ','.join([str(value) for value in embedding[:8192]])
        }
    ])