In [None]:
!pip install -U neo4j spacy python-decouple
!python3 -m spacy download en_core_web_sm

In [None]:
from neo4j import GraphDatabase

import pandas as pd
import spacy
from decouple import config

uri = config('NEO4J_URI')
username = config('NEO4J_USERNAME')
password = config('NEO4J_PASSWORD')

driver = GraphDatabase.driver(uri, auth=(username, password))

nlp = spacy.load('en_core_web_sm')

In [None]:
def generate_embeddings(text):
    doc = nlp(text)
    return doc.vector

In [None]:
query = "MATCH (p:Person) WHERE p.embedding IS NULL RETURN p.email AS email, p.firstName as firstName, p.lastName as lastName"
with driver.session() as session:
    result = session.run(query)
    personNode = []
    personNodeEmbeddings = []
    for record in result:
        personNode.append((record["email"], record["firstName"], record["lastName"]))
    df = pd.DataFrame(personNode, columns=['email', 'firstName', 'lastName'])
    df['plainText'] = df['email'] + ' ' + df['firstName'] + ' ' + df['lastName']
    df['embedding'] = df['plainText'].apply(generate_embeddings)

In [None]:
with driver.session() as session:
        query = """
        UNWIND $data AS data
        MATCH (p:Person)
        WHERE p.email = data.email AND p.firstName = data.firstName AND p.lastName = data.lastName
        SET p.embedding = data.embedding
        """
        
        data_to_write = df.to_dict(orient='records')
        for item in data_to_write:
            item['embedding'] = item['embedding'].tolist()

        batch_size = 500
        with driver.session() as session:
            for batch_start in range(0, len(data_to_write), batch_size):
                batch_end = batch_start + batch_size
                batch_data = data_to_write[batch_start:batch_end]
                session.run(query, data=batch_data)

In [None]:
driver.close()