In [None]:

# importing os module for environment variables
import os
# importing necessary functions from dotenv library
from dotenv import load_dotenv, dotenv_values 
# loading variables from .env file
load_dotenv() 

from classes import Neo4jConnection
from rdflib import Dataset
from rdflib.namespace import RDF

conn = Neo4jConnection(uri=os.getenv('NEO4J_URI'), user=os.getenv('NEO4J_USERNAME'), pwd=os.getenv('NEO4J_PASSWORD'))

try:

    # Clean the database
    res = conn.query(f"match (a) -[r] -> () delete a, r")
    res = conn.query(f"match (a) delete a")
    res = conn.query(f"DROP INDEX entities IF EXISTS")
    res = conn.query(f"DROP INDEX entity_index IF EXISTS")

    dataset = Dataset()
    dataset.parse("data/model.trig", "format=trig")

    # @prefix geo: <http://www.opengis.net/ont/geosparql#> .
    # @prefix lpgv: <https://dev-georegistry.geoprism.net/lpg/deliverable2024/0#> .
    # @prefix lpg: <https://dev-georegistry.geoprism.net/lpg#> .
    # @prefix lpgs: <https://dev-georegistry.geoprism.net/lpg/rdfs#> .
    # @prefix sf: <http://www.opengis.net/ont/sf#> .
    # @prefix lpgvs: <https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#> .
    # @prefix dcterms: <http://purl.org/dc/terms/> .
    # @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

    school_query = """
    PREFIX lpgvs: <https://dev-georegistry.geoprism.net/lpg/deliverable2024/0/rdfs#>
    PREFIX lpgs: <https://dev-georegistry.geoprism.net/lpg/rdfs#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT ?schoolCode ?schoolLabel ?zoneCode ?zoneLabel ?areaCode ?areaLabel ?leveeCode ?leveeLabel ?channelCode ?channelLabel
    WHERE {
        ?school a lpgvs:School .
        ?school lpgs:GeoObject-code ?schoolCode .
        ?school rdfs:label ?schoolLabel .   
        ?school lpgvs:HasSchoolZone ?zone .
        ?school lpgvs:HasFloodRisk ?area .    
        ?area lpgvs:HasFloodZone ?levee .
        ?levee lpgvs:HasFloodZone ?channel .        
        ?zone lpgs:GeoObject-code ?zoneCode .
        ?zone rdfs:label ?zoneLabel .      
        ?area lpgs:GeoObject-code ?areaCode .
        ?area rdfs:label ?areaLabel .   
        ?levee lpgs:GeoObject-code ?leveeCode .
        ?levee rdfs:label ?leveeLabel .           
        ?channel lpgs:GeoObject-code ?channelCode .
        ?channel rdfs:label ?channelLabel .           
    }
    """

    for c in dataset.graphs():  # doctest: +SKIP
        if 'deliverable2024' in str(c.identifier):
        
            qres = c.query(school_query)
    
            for row in qres:
                
                schoolLabel = row.schoolLabel.replace("'", "\\'")
                zoneLabel = row.zoneLabel.replace("'", "\\'")
                areaLabel = row.areaLabel.replace("'", "\\'")
                leveeLabel = row.leveeLabel.replace("'", "\\'")
                channelLabel = row.channelLabel.replace("'", "\\'")
            
                statement = (
                    f"MERGE (area:Entity:FloodArea {{name: '{areaLabel}', code: '{row.areaCode}'}})"
                    f"MERGE (school:Entity:School {{name: '{schoolLabel}', code: '{row.schoolCode}'}})"
                    f"MERGE (zone:Entity:SchoolZone {{name: '{zoneLabel}', code: '{row.zoneCode}'}})"                
                    f"MERGE (levee:Entity:Levee {{name: '{leveeLabel}', code: '{row.leveeCode}'}})"                                    
                    f"MERGE (channel:Entity:Channel {{name: '{channelLabel}', code: '{row.channelCode}'}})"                                                        
                    f"MERGE (zone)-[:HAS_SCHOOL]->(school)"
                    f"MERGE (school)-[:HAS_FLOOD_AREA]->(area)"                
                    f"MERGE (area)-[:PROTECTED_BY]->(levee)"                                    
                    f"MERGE (levee)-[:PROTECTS_AGAINST]->(channel)"                                                        
                )
            
                res = conn.query(statement)
            
    conn.query("CREATE FULLTEXT INDEX entities FOR (n:Entity) ON EACH [n.name]")
finally:
    conn.close()       

In [8]:
# from langchain_community.vectorstores import Neo4jVector

# existing_graph = Neo4jVector.from_existing_graph(
#     embedding=embeddings,
#     url="bolt://localhost:7687", 
#     username="neo4j",              
#     password="test_password",
#     index_name="entity_index",
#     node_label="Entity",
#     text_node_properties=["name", "code"],
#     embedding_node_property="embedding"
# )
# result = existing_graph.similarity_search("Georgetown", k=15)

# print(result)