In [1]:
from langchain.chains import GraphCypherQAChain

import embedding_handling
import file_handling
import graph_handling

In [2]:
graph = graph_handling.get_graph_connector()

In [3]:
graph.refresh_schema()

In [4]:
cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=graph_handling.GPT4,
    qa_llm=graph_handling.GPT4,
    validate_cypher=True, # Validate relationship directions
    verbose=True
)

In [7]:
cypher_chain.run("Where did Heinrich Schliemann discover?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person {name: "Heinrich Schliemann"})-[:DISCOVERED]->(pl:Place) RETURN pl.name[0m
Full Context:
[32;1m[1;3m[{'pl.name': 'Troy'}, {'pl.name': 'Mycenae'}][0m

[1m> Finished chain.[0m


'Heinrich Schliemann made his discoveries in Troy and Mycenae.'

In [11]:
print(graph.schema)


        Node properties are the following:
        [{'labels': 'Place', 'properties': [{'property': 'discoverydate', 'type': 'STRING'}, {'property': 'rebuildings', 'type': 'STRING'}, {'property': 'importance', 'type': 'STRING'}, {'property': 'id', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}, {'property': 'importantfeature', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}, {'property': 'statehalves', 'type': 'STRING'}, {'property': 'regions', 'type': 'STRING'}, {'property': 'possiblestructure', 'type': 'STRING'}, {'property': 'destructiondate', 'type': 'STRING'}, {'property': 'alias', 'type': 'STRING'}, {'property': 'inhabitationperiod', 'type': 'STRING'}, {'property': 'significantchange', 'type': 'STRING'}, {'property': 'startofrecordedhistory', 'type': 'STRING'}, {'property': 'rapidexpansion', 'type': 'STRING'}, {'property': 'burntandlooteddate', 'type': 'STRING'}, {'property': 'lostrulerstatus', 'type': 'STRING'}, {'property': 'rulearea', 'type': 'STRI

In [12]:
cypher_chain.run("What are the IDs of the ObsidianNoteChunk nodes that have a relationship with the Mycenaeans?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:People {name: 'Mycenaeans'})-[:REFERENCES_NODE]-(o:ObsidianNoteChunk) RETURN o.id[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't know the answer."

In [1]:
import graph_handling

In [2]:
names = graph_handling.get_all_node_names()

In [3]:
names

['Greece',
 'Mycenae',
 'Absurdly Huge Stones And Giant Gravesites',
 'Tholos Tomb',
 'Knossos',
 'Wanax',
 'Mother Goddess',
 'Mycenaeanpalaces',
 'Heavilyarmoredsoldiers',
 'Kingsandcommanders',
 'Chariots',
 'Collapse',
 'Heinrich Schliemann',
 'Troy',
 'The Treasure Of Priam',
 'Mycenae',
 'Grave Circle A',
 'Minoans',
 'Crete',
 'Minoan Palaces',
 'Cretan Hieroglyphic Script',
 'Hieroglyphic Script From Phaistos',
 'Linear A',
 'Akrotiri',
 'Mycenaeans',
 'Kingdom Of Pylos',
 'Mycenaean Sparta',
 'Linear B Script Tablets',
 'Linear A',
 'Linear B',
 'Odyssey',
 'Near East',
 'Egypt',
 'Lerna',
 'House Of Tiles',
 'Minoan Civilization',
 'Crete',
 'Sir Arthur Evans',
 'Cnossus',
 'Anonymous Greek Historian',
 'The Parian Marble',
 'Franchthi Cave',
 'Greece',
 'Iliad']

In [4]:
relevant_names = graph_handling.get_relevant_nodes_from_question(graph_handling.GPT4, names, "Where did Heinrich Schliemann discover?", True)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: # Prompt for GPT-4:
Question:
"Where did Heinrich Schliemann discover?"

List of Node Names from the Knowledge Graph:
Greece, Mycenae, Absurdly Huge Stones And Giant Gravesites, Tholos Tomb, Knossos, Wanax, Mother Goddess, Mycenaeanpalaces, Heavilyarmoredsoldiers, Kingsandcommanders, Chariots, Collapse, Heinrich Schliemann, Troy, The Treasure Of Priam, Mycenae, Grave Circle A, Minoans, Crete, Minoan Palaces, Cretan Hieroglyphic Script, Hieroglyphic Script From Phaistos, Linear A, Akrotiri, Mycenaeans, Kingdom Of Pylos, Mycenaean Sparta, Linear B Script Tablets, Linear A, Linear B, Odyssey, Near East, Egypt, Lerna, House Of Tiles, Minoan Civilization, Crete, Sir Arthur Evans, Cnossus, Anonymous Greek Historian, The Parian Marble, Franchthi Cave, Greece, Iliad

# Task for GPT-4:
Analyze the provided list of names from the knowledge graph in the context of the question. Identify and list the names tha

In [5]:
relevant_names

NodeNameList(names=['Heinrich Schliemann', 'Troy', 'The Treasure Of Priam', 'Mycenae', 'Grave Circle A', 'Greece'])

In [31]:
graph_handling.get_graph_connector().query("""
    MATCH (n)
    WHERE n.name IN ['Mycenae']
    OPTIONAL MATCH (n)-[r]-(related:ObsidianNoteChunk)
    RETURN collect({id: related.id}) as relatedNodes
""")

[{'relatedNodes': [{'id': 'f9453227-664b-41bb-9b9e-21fe3d5acfce'},
   {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'},
   {'id': '139bea43-a0a8-4a4e-8a4d-6e8d8a7c954e'}]}]

In [6]:
connected_to_troy = graph_handling.get_graph_connector().query("""MATCH (n)
WHERE n.name = 'Troy'
OPTIONAL MATCH (n)-[r]-(related)
RETURN n, collect(r) as relationships, collect(related) as relatedNodes""")

In [17]:
connected_to_troy2 = graph_handling.get_graph_connector().query("""
MATCH (n)
WHERE n.name = 'Troy'
OPTIONAL MATCH (n)-[r]-(related)
RETURN n, 
       collect(r) as relationships, 
       collect(
           CASE WHEN 'ObsidianNoteChunk' IN labels(related) 
                THEN {label: 'ObsidianNoteChunk', file_name: related.file_name, chunk_number: related.chunk_number, id: related.id} 
                ELSE related 
           END
       ) as relatedNodes
""")

In [22]:
# nodes that have a relationship with the node Troy
# connected_to_troy2[0]['relatedNodes']

connected_to_troy2[0]['relationships']
# from here can grab id from nodes with label = ObsidianNoteChunk

# the relationships key can also be used to have GPT reason about the relationships between
# the Troy node and the other nodes (i.e. Heinrich Schliemann)

[({'profession': 'archaeologist',
   'characteristics': 'brilliant linguist, questionable, problematic',
   'name': 'Heinrich Schliemann',
   'id': 'Heinrich Schliemann'},
  'DISCOVERED',
  {'discoverydate': '1870s',
   'rebuildings': 'several layers of different rebuildings',
   'importance': 'strategic location at mouth of water trade route',
   'name': 'Troy',
   'id': 'Troy'}),
 ({},
  'REFERENCES_NODE',
  {'discoverydate': '1870s',
   'rebuildings': 'several layers of different rebuildings',
   'importance': 'strategic location at mouth of water trade route',
   'name': 'Troy',
   'id': 'Troy'})]

In [43]:
graph_handling.get_graph_connector().query("""
MATCH (n)
WHERE n.name = 'Mycenae'
OPTIONAL MATCH (n)-[r]-(related)
RETURN n, 
        collect(r) as relationships, 
        collect(
            CASE
                WHEN 'ObsidianNoteChunk' IN labels(related) 
                    THEN {label: 'ObsidianNoteChunk'}
                WHEN NOT related.name IN []
                    THEN {label: 'Unrelated'}
                ELSE related
            END
        ) as relatedNodes
""")

[{'n': {'appearance': 'Unknown',
   'adaptationfromminoans': 'gravesites',
   'wealthincrease': 'Yes',
   'name': 'Mycenae',
   'id': 'Mycenae'},
  'relationships': [({'appearance': 'Unknown',
     'adaptationfromminoans': 'gravesites',
     'wealthincrease': 'Yes',
     'name': 'Mycenae',
     'id': 'Mycenae'},
    'ADAPTED_FROM',
    {}),
   ({},
    'REFERENCES_NODE',
    {'appearance': 'Unknown',
     'adaptationfromminoans': 'gravesites',
     'wealthincrease': 'Yes',
     'name': 'Mycenae',
     'id': 'Mycenae'})],
  'relatedNodes': [{'label': 'Unrelated'}, {'label': 'ObsidianNoteChunk'}]},
 {'n': {'discoverydate': 'after Troy',
   'importance': 'burial sites, palace of Agamemnon',
   'name': 'Mycenae',
   'id': 'Mycenae'},
  'relationships': [({},
    'INFLUENCED',
    {'discoverydate': 'after Troy',
     'importance': 'burial sites, palace of Agamemnon',
     'name': 'Mycenae',
     'id': 'Mycenae'}),
   ({},
    'DISCOVERED',
    {'discoverydate': 'after Troy',
     'importanc

In [56]:
def get_non_housekeeping_relationships_from_node_name(
        node_name: str,
        allowed_names: list[str] | None = None,
    ) -> list[tuple[dict,str,dict]]:

    allowed_node_syntax = ""
    
    if allowed_names:
        allowed_node_syntax += "WHEN NOT related.name IN ["
        allowed_node_syntax += ",".join([f"'{name}'" for name in allowed_names])
        allowed_node_syntax += "]\nTHEN {label: 'Unrelated'}"
    
    query_results: list[dict] = graph_handling.get_graph_connector().query(f"""
                MATCH (n)
                WHERE n.name = '{node_name}'"""+"""
                OPTIONAL MATCH (n)-[r]-(related)
                RETURN n, 
                    collect(r) as relationships, 
                    collect(
                        CASE
                            WHEN 'ObsidianNoteChunk' IN labels(related) 
                                THEN {label: 'ObsidianNoteChunk'}""" +
                                allowed_node_syntax + """
                            ELSE related 
                        END
                    ) as relatedNodes
                """)
    
    results: list[tuple[dict,str,dict]] = []

    # could be more than one node with the same name
    for node in query_results:
        for relationship in node['relationships']:
            # second item is edge type
            # len checks are to make sure the node didn't get filtered out
            if relationship[1] != 'REFERENCES_NODE' and len(relationship[0]) != 0 and len(relationship[2]) != 0:
                results.append(relationship)

    return results


In [60]:
get_non_housekeeping_relationships_from_node_name('Mycenae')

[({'appearance': 'Unknown',
   'adaptationfromminoans': 'gravesites',
   'wealthincrease': 'Yes',
   'name': 'Mycenae',
   'id': 'Mycenae'},
  'ADAPTED_FROM',
  {'name': 'Minoan Civilization', 'id': 'Minoan Civilization'}),
 ({'name': 'Minoans',
   'location': 'Crete',
   'id': 'Minoans',
   'timeperiod': '1700 - 1500 BCE',
   'influence': 'earliest influences on Mycenaean Greeks'},
  'INFLUENCED',
  {'discoverydate': 'after Troy',
   'importance': 'burial sites, palace of Agamemnon',
   'name': 'Mycenae',
   'id': 'Mycenae'}),
 ({'profession': 'archaeologist',
   'characteristics': 'brilliant linguist, questionable, problematic',
   'name': 'Heinrich Schliemann',
   'id': 'Heinrich Schliemann'},
  'DISCOVERED',
  {'discoverydate': 'after Troy',
   'importance': 'burial sites, palace of Agamemnon',
   'name': 'Mycenae',
   'id': 'Mycenae'})]

In [61]:
# GPT prompt for summarizing one of these relationships into a paragraph
prompt = "This is a relationship between two nodes in a Neo4j graph. Please use this information to give a summary of this relationship in a succinct paragraph that does not mention anything about a graph or nodes:"

In [62]:
def summarize_relationship(
    llm,
    relationship: tuple[dict, str, dict],
    verbose: bool = False,
):
    """Uses LLM to summarize the relationship between two nodes."""

    prompt = graph_handling.ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """# Prompt for GPT-4:
Relationship between two nodes:
"{relationship}"

# Task for GPT-4:
This is a relationship between two nodes in a Neo4j graph. Please use this information to give a summary of this relationship in a succinct paragraph that does not mention anything about a graph or nodes.
""",
            ),
        ]
    )

    chain = graph_handling.create_structured_output_chain(
        str, llm, prompt, verbose=verbose)

    return chain.run(relationship=relationship)

In [63]:
summarize_relationship(graph_handling.GPT4, get_non_housekeeping_relationships_from_node_name('Mycenae')[0], True)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: # Prompt for GPT-4:
Relationship between two nodes:
"({'appearance': 'Unknown', 'adaptationfromminoans': 'gravesites', 'wealthincrease': 'Yes', 'name': 'Mycenae', 'id': 'Mycenae'}, 'ADAPTED_FROM', {'name': 'Minoan Civilization', 'id': 'Minoan Civilization'})"

# Task for GPT-4:
This is a relationship between two nodes in a Neo4j graph. Please use this information to give a summary of this relationship in a succinct paragraph that does not mention anything about a graph or nodes.
[0m

[1m> Finished chain.[0m


'The Mycenae civilization, which is known for its increase in wealth but has an unknown appearance, adapted from the Minoan Civilization. This adaptation is evident from the gravesites of the Mycenae civilization.'

In [12]:
def get_chunk_ids_by_node_names(node_names: list[str]) -> list[str]:
    """Given a list of node names, returns the ids of the chunks that reference them.
    
    May contain duplicates.
    """

    if len(node_names) == 0:
        return []

    ids: list[dict] = graph_handling.get_graph_connector().query(f"""
                            MATCH (n)
                            WHERE n.name IN [{",".join([f"'{name}'" for name in node_names])}]
                            OPTIONAL MATCH (n)-[r]-(related:ObsidianNoteChunk)""" +
                        """ RETURN collect({id: related.id}) as relatedNodes
                        """)

    return [list(d.values())[0] for d in ids[0]['relatedNodes']]

In [13]:
chunks_from_question = get_chunk_ids_by_node_names(relevant_names.names)

[{'relatedNodes': [{'id': 'f9453227-664b-41bb-9b9e-21fe3d5acfce'}, {'id': 'f9453227-664b-41bb-9b9e-21fe3d5acfce'}, {'id': 'bb0ab99e-debc-43ff-92f5-acca4167e99d'}, {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'}, {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'}, {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'}, {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'}, {'id': '139bea43-a0a8-4a4e-8a4d-6e8d8a7c954e'}, {'id': 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3'}, {'id': 'c82fdc35-043c-4b58-8c20-0e97641a25ee'}]}]


In [15]:
set(chunks_from_question)

{'139bea43-a0a8-4a4e-8a4d-6e8d8a7c954e',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'bb0ab99e-debc-43ff-92f5-acca4167e99d',
 'c82fdc35-043c-4b58-8c20-0e97641a25ee',
 'f9453227-664b-41bb-9b9e-21fe3d5acfce'}