# Convert data to neo4j graph

In [1]:
import sys
import os

sys.path.append('..')

from model import ChoiceModel

# Initialize the model, for the first time, you need to set skip_init to 'False'. it will take a while to prepare the data.
# sample_num is the total number of samples to build the dataset.

choice_model = ChoiceModel(
    data_dir='../data', desire='Eat', sample_num=1000, skip_init=True, skip_test=True)

train_data_path = choice_model._prepare_train_data(
                sample_num=1000, desire=None)
choice_model._prepare_neo4j(train_data_path)

preparing train data...: 100%|██████████| 1000/1000 [00:02<00:00, 441.97it/s]
adding person nodes...: 100%|██████████| 997/997 [00:01<00:00, 799.08it/s]
adding desire nodes...: 100%|██████████| 999/999 [00:01<00:00, 971.85it/s]
adding itention nodes...: 100%|██████████| 1000/1000 [00:01<00:00, 878.64it/s]
adding wan_to edges...: 100%|██████████| 999/999 [00:01<00:00, 617.48it/s]
adding go_to edges...: 100%|██████████| 1000/1000 [00:01<00:00, 577.82it/s]


creating neo4j index...
done!


# Add similar_to link

In [2]:
from langchain_community.graphs import Neo4jGraph

url = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jgraph"

graph = Neo4jGraph(
    url=url,
    username=username,
    password=password,
    enhanced_schema=True,
)

### Clear existing similar_to link

In [3]:
cypher = '''
MATCH ()-[r:SIMILAR_TO]->()
DELETE r
'''
graph.query(cypher)

[]

### Create similar_to link

In [4]:
cypher = '''
MATCH (p:Person)
CALL {
  WITH p
  MATCH (similar:Person)
  WHERE p.id <> similar.id
  WITH p, similar, gds.similarity.cosine(p.embedding, similar.embedding) AS similarityScore
  ORDER BY similarityScore DESC
  LIMIT 5
  MERGE (p)-[r:SIMILAR_TO]->(similar)
  SET r.score = similarityScore, r.type = 'SIMILAR_TO'
  RETURN count(*) as count
}
RETURN count(p) as processedNodes
'''
graph.query(cypher)

[{'processedNodes': 997}]