In [None]:
'''
create a neo4j docker container with the following command:

```
docker run \
    -it \
    --restart always \
    --publish=7474:7474 --publish=7687:7687 \
    --env NEO4J_AUTH=neo4j/neo4jgraph \
    --volume=$HOME/Documents/neo4j/data:/data \
    --volume=$HOME/Documents/neo4j/logs:/logs \
    --volume=$HOME/Documents/neo4j/conf:/conf \
    --env NEO4J_dbms_memory_pagecache_size=4G \
    --env NEO4J_PLUGINS='["apoc", "graph-data-science"]' \
    --name neo4j \
    neo4j:5.20.0

```

sudo chmod 777 $HOME/Documents/neo4j/data
sudo chmod 777 $HOME/Documents/neo4j/logs
sudo chmod 777 $HOME/Documents/neo4j/conf
'''

In [1]:
from langchain_community.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.embeddings import OllamaEmbeddings
embedding_model = OllamaEmbeddings(model="nomic-embed-text")


In [2]:
url="bolt://localhost:7687"
username="neo4j"
password="neo4jgraph"

graph = Neo4jGraph (
    url=url,
    username=username,
    password=password,
    enhanced_schema=True,
)



# Create Dataset

In [3]:
from neo4j import GraphDatabase

#Connect to Neo4j
uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jgraph"
driver = GraphDatabase.driver(uri, auth=(username, password))

In [4]:
# clear the exsiting data

def clear_database(tx):
    # Delete all relationships
    tx.run("MATCH (()-[r]->() ) DELETE r")
    # Delete all nodes
    tx.run("MATCH (n) DELETE n")

with driver.session() as session:
    session.execute_write(clear_database)

In [5]:
import pandas as pd
import numpy as np

person_df = pd.read_csv('data/train/1K/person.csv')
desire_df = pd.read_csv('data/train/1K/desire.csv')
itention_df = pd.read_csv('data/train/1K/intention.csv')
go_to_edge_df = pd.read_csv('data/train/1K/go_to_edge.csv')
want_to_edge_df = pd.read_csv('data/train/1K/want_to_edge.csv')

In [6]:
def create_person(tx,id,age,income,household_size,family_structure,vehicles,name,description):
    tx.run("CREATE (a:Person {id: $id, age: $age, income: $income, household_size: $household_size, family_structure: $family_structure, vehicles: $vehicles, name: $name, description: $description})",
           id=id, age=age, income=income, household_size=household_size, family_structure=family_structure, vehicles=vehicles,name=name,description=description)

def create_desire(tx,id,desire,description):
    tx.run("CREATE (a:Desire {id: $id, desire: $desire, description: $description})",
           id=id, desire=desire, description=description)

def create_intention(tx,id,target_amenity,mode,distance_miles,duration_minutes,location_name,description):
    tx.run("CREATE (a:Intention {id: $id, target_amenity: $target_amenity, mode: $mode, distance_miles: $distance_miles, duration_minutes: $duration_minutes, location_name: $location_name, description: $description})",
              id=id, target_amenity=target_amenity, mode=mode, distance_miles=distance_miles, duration_minutes=duration_minutes, location_name=location_name,description=description)
    
def create_want_to_edge(tx,person_id,desire_id):
    tx.run("MATCH (a:Person),(b:Desire) WHERE a.id = $person_id AND b.id = $desire_id CREATE (a)-[r:WANT_TO]->(b)",
           person_id=person_id, desire_id=desire_id)

def create_go_to_edge(tx,desire_id,intention_id):
    tx.run("MATCH (a:Desire),(b:Intention) WHERE a.id = $desire_id AND b.id = $intention_id CREATE (a)-[r:GO_TO]->(b)",
           desire_id=desire_id, intention_id=intention_id)

In [7]:
with driver.session() as session:

    for i, row in person_df.iterrows():
        session.execute_write(create_person, row['id'], row['age'], row['individual_income'], row['household_size'], row['family_structure'], row['vehicles'], 'Person', row['description'])

    for i, row in desire_df.iterrows():
        session.execute_write(create_desire, row['id'], row['desire'], row['description'])
    
    for i, row in itention_df.iterrows():
        session.execute_write(create_intention, row['id'], row['target_amenity'], row['mode'], row['distance_miles'], row['duration_minutes'], row['location_name'], row['description'])
    
    for i, row in want_to_edge_df.iterrows():
        session.execute_write(create_want_to_edge, row['source'], row['target'])
    
    for i, row in go_to_edge_df.iterrows():
        session.execute_write(create_go_to_edge, row['source'], row['target'])

# Create Index

In [3]:
# graph.query(
# "DROP INDEX person;"
# )
# graph.query(
# '''
# MATCH (p:Person)
# REMOVE p.embedding
# '''
# )

In [8]:
retrieval_query = """
RETURN node.description AS text, score, node {.age,.individual_income,.household_size,.family_structure,.vehicles} AS metadata
"""

person_index = Neo4jVector.from_existing_graph(
    embedding_model,
    url=url,
    username=username,
    password=password,
    index_name='person',
    node_label="Person",
    text_node_properties=['description'],
    embedding_node_property='embedding',
    retrieval_query=retrieval_query,
)

# Load Old index

In [9]:
retrieval_query = """
RETURN node {.description} AS text, score, node {.age,.individual_income,.household_size,.family_structure,.vehicles} AS metadata
"""

person_index = Neo4jVector.from_existing_index(
    embedding=embedding_model,
    url=url,
    username=username,
    password=password,
    index_name='person',
    retrieval_query=retrieval_query,
)

In [10]:

results = person_index.similarity_search_with_score(
    query=" A 20 year old person, living in a nonfamily_single family with 3_person members. The person has 2 vehicles and an annual income of 2020 dollars",
    k = 5
)

for r in results:
    print(r[0].page_content,f"Score:{r[1]}",sep="\n")
    print("-"*20)



description: A 20 year old person, living in a nonfamily_single family with 3_person members. The person has 0 vehicles and an annual income of 0 dollars.

Score:0.9059115648269653
--------------------
description: A 20 year old person, living in a nonfamily_single family with 3_person members. The person has 0 vehicles and an annual income of 0 dollars.

Score:0.9059115648269653
--------------------
description: A 20 year old person, living in a nonfamily_single family with 3_person members. The person has 0 vehicles and an annual income of 0 dollars.

Score:0.9059115648269653
--------------------
description: A 20 year old person, living in a nonfamily_single family with 3_person members. The person has 0 vehicles and an annual income of 7426 dollars.

Score:0.9058500528335571
--------------------
description: A 20 year old person, living in a nonfamily_single family with 2_person members. The person has 0 vehicles and an annual income of 0 dollars.

Score:0.9033349752426147
--------