# Similarity Score

In [4]:
from langchain_community.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.embeddings import OllamaEmbeddings

In [5]:
url="bolt://localhost:7687"
username="neo4j"
password="neo4jgraph"
embedding_model = OllamaEmbeddings(model="nomic-embed-text")

graph = Neo4jGraph (
    url=url,
    username=username,
    password=password,
    enhanced_schema=True,
)



In [6]:
profile = 'a rich young man with 1 vehicles'
cypher = '''
MATCH (p:Person)
WHERE p.income >=60000 AND p.income <90000 AND 
      p.age >=18 AND p.age <30 AND 
      p.vehicles = '1'
WITH p ORDER BY rand() LIMIT 50
MATCH (p)-[r1:WANT_TO]-(d:Desire {desire:'Eat'})-[r2:GO_TO]-(i:Intention)
RETURN COLLECT(DISTINCT p) AS person, 
    COLLECT(DISTINCT d) AS desire, 
    COLLECT(DISTINCT i) AS intention,
    COLLECT(DISTINCT r1) AS want_to,
    COLLECT(DISTINCT r2) AS go_to
'''

query_results = graph.query(cypher)[0]

In [7]:
people = query_results['person']
print(people)

[{'income': 69774, 'household_size': '2_person', 'name': 'Person', 'description': 'A 26 year old person, living in a nonfamily_single family with 2_person members. The person has 1 vehicles and an annual income of 69774 dollars.', 'vehicles': '1', 'embedding': [-0.3102651536464691, 1.031312108039856, -3.404232978820801, -0.4400428235530853, -1.0102461576461792, 0.09375612437725067, 0.24129998683929443, -0.11295327544212341, 0.2624088227748871, -0.440420001745224, -1.0628021955490112, -0.8955872654914856, 0.0013450905680656433, 0.14530807733535767, -0.016412053257226944, 0.424463152885437, -0.02059897780418396, -0.14924539625644684, 0.35562625527381897, 0.6938388347625732, -0.6926067471504211, 0.45424437522888184, -1.227227807044983, 0.2425413429737091, 2.5772712230682373, 0.9266771078109741, 0.31028181314468384, 0.9778851270675659, -1.3810807466506958, -0.2591446340084076, 1.0411412715911865, 0.08703000098466873, -0.057282038033008575, -0.1962922364473343, -2.5346479415893555, -0.21601

In [8]:
people = query_results['person']
description_embedding_pairs = []
for p in people:
    description_embedding_pairs.append((p['id'], p['embedding']))

sub_person_index = Neo4jVector.from_embeddings(
    url=url,
    username=username,
    password=password,
    text_embeddings=description_embedding_pairs,
    embedding=embedding_model,
    pre_delete_collection=True,
)
results = sub_person_index.similarity_search_with_score(query=profile,k=len(people))
results = [ {"id":r.page_content,"similarity_score":score} for r,score in results]
print(results)

[{'id': 'Person_3.8426937351904113e+18', 'similarity_score': 0.8204120993614197}, {'id': 'Person_1.1499958489440956e+19', 'similarity_score': 0.8197864294052124}, {'id': 'Person_8.451599414608053e+18', 'similarity_score': 0.8165066242218018}, {'id': 'Person_1.518803603034496e+19', 'similarity_score': 0.815579354763031}, {'id': 'Person_8.239046909831888e+18', 'similarity_score': 0.8154996037483215}, {'id': 'Person_1.6054756723242437e+19', 'similarity_score': 0.8109298944473267}, {'id': 'Person_8.066272395491427e+18', 'similarity_score': 0.8098921179771423}, {'id': 'Person_5.217925380016289e+18', 'similarity_score': 0.8080804944038391}]


In [9]:
for p in people:
    for r in results:
        if p['id'] == r['id']:
            p['similarity_score'] = r['similarity_score']

print(people)

[{'income': 69774, 'household_size': '2_person', 'name': 'Person', 'description': 'A 26 year old person, living in a nonfamily_single family with 2_person members. The person has 1 vehicles and an annual income of 69774 dollars.', 'vehicles': '1', 'embedding': [-0.3102651536464691, 1.031312108039856, -3.404232978820801, -0.4400428235530853, -1.0102461576461792, 0.09375612437725067, 0.24129998683929443, -0.11295327544212341, 0.2624088227748871, -0.440420001745224, -1.0628021955490112, -0.8955872654914856, 0.0013450905680656433, 0.14530807733535767, -0.016412053257226944, 0.424463152885437, -0.02059897780418396, -0.14924539625644684, 0.35562625527381897, 0.6938388347625732, -0.6926067471504211, 0.45424437522888184, -1.227227807044983, 0.2425413429737091, 2.5772712230682373, 0.9266771078109741, 0.31028181314468384, 0.9778851270675659, -1.3810807466506958, -0.2591446340084076, 1.0411412715911865, 0.08703000098466873, -0.057282038033008575, -0.1962922364473343, -2.5346479415893555, -0.21601

In [10]:
people[0]['similarity_score']

0.8197864294052124