In [1]:
import graph_handling
import embedding_handling

In [2]:
query = "Where did Heinrich Schliemann discover?"

In [3]:
query_result = embedding_handling.user_query_to_chromadb_query_result(query)
chunk_ids_from_embedding = embedding_handling.chroma_query_result_to_chunk_ids(query_result)

Number of requested results 10 is greater than number of elements in index 8, updating n_results = 8


In [4]:
chunk_ids_from_embedding

['b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'c82fdc35-043c-4b58-8c20-0e97641a25ee',
 'bb0ab99e-debc-43ff-92f5-acca4167e99d']

In [5]:
node_names = graph_handling.get_relevant_nodes_from_question(
    graph_handling.GPT4,
    graph_handling.get_all_node_names(),
    query,
    True)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: # Prompt for GPT-4:
Question:
"Where did Heinrich Schliemann discover?"

List of Node Names from the Knowledge Graph:
Greece, Mycenae, Absurdly Huge Stones And Giant Gravesites, Tholos Tomb, Knossos, Wanax, Mother Goddess, Mycenaeanpalaces, Heavilyarmoredsoldiers, Kingsandcommanders, Chariots, Collapse, Heinrich Schliemann, Troy, The Treasure Of Priam, Mycenae, Grave Circle A, Minoans, Crete, Minoan Palaces, Cretan Hieroglyphic Script, Hieroglyphic Script From Phaistos, Linear A, Akrotiri, Mycenaeans, Kingdom Of Pylos, Mycenaean Sparta, Linear B Script Tablets, Linear A, Linear B, Odyssey, Near East, Egypt, Lerna, House Of Tiles, Minoan Civilization, Crete, Sir Arthur Evans, Cnossus, Anonymous Greek Historian, The Parian Marble, Franchthi Cave, Greece, Iliad

# Task for GPT-4:
Analyze the provided list of names from the knowledge graph in the context of the question. Identify and list the names tha

In [6]:
node_names

NodeNameList(names=['Heinrich Schliemann', 'Troy', 'The Treasure Of Priam', 'Mycenae', 'Grave Circle A', 'Greece'])

In [7]:
chunk_ids_from_graph = graph_handling.get_chunk_ids_by_node_names(node_names.names)

In [8]:
chunk_ids_from_graph
# note the duplication and also overlap from the embeddings

['f9453227-664b-41bb-9b9e-21fe3d5acfce',
 'f9453227-664b-41bb-9b9e-21fe3d5acfce',
 'bb0ab99e-debc-43ff-92f5-acca4167e99d',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 '139bea43-a0a8-4a4e-8a4d-6e8d8a7c954e',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'c82fdc35-043c-4b58-8c20-0e97641a25ee']

In [9]:
combined_chunk_ids = list(set(chunk_ids_from_embedding + chunk_ids_from_graph))

In [10]:
combined_chunk_ids

['c82fdc35-043c-4b58-8c20-0e97641a25ee',
 'bb0ab99e-debc-43ff-92f5-acca4167e99d',
 '139bea43-a0a8-4a4e-8a4d-6e8d8a7c954e',
 'b3c1a0e4-8eaf-47ed-b3d5-bf9953129af3',
 'f9453227-664b-41bb-9b9e-21fe3d5acfce']

In [11]:
collection = embedding_handling.get_vector_store_collection()

In [12]:
docs = collection.get(ids=combined_chunk_ids,include=['documents'])['documents']

In [13]:
docs

['Heinrich Schliemann\ndiscovers Troy in the 1870s - it’s now known to be a real place! Archaeology is having a revolution. Period of romanticism and philhellenism - Greece gaining independence from Ottoman empire.\n\nfound several layers of different rebuildings of Troy\nTroy - very strategic location at mouth of water trade route\n\nvery destructive digging and some looting - dug a giant hole\n\n“The Treasure of Priam” - lots of wealth - gold & copper items\n\nbro liked to lie, was brilliant linguist but generally questionable & problematic\n\nFueled by success of Troy, Schliemann finds Mycenae\n- again digs straight down, into burial sites “Grave Circle A”\n- finds again lots of gold - remember no gold found in Greece, had to have come from somewhere else via war / trade\n- “palace of Agamemnon”\n\nStill scholarship being conducted on these findings with new information, techniques, and technology. DNA analysis\n\nMinoans 1700 - 1500 BCE\n- don’t know what language they spoke or wha

In [14]:
graph_handling.get_graph_connector().query("""UNWIND ['Heinrich Schliemann', 'Mycenae', 'Minoans', 'Sir Arthur Evans', 'Cnossus', 'Crete'] AS nodeName1
UNWIND ['Heinrich Schliemann', 'Mycenae', 'Minoans', 'Sir Arthur Evans', 'Cnossus', 'Crete'] AS nodeName2
MATCH (n1)-[r]->(n2)
WHERE n1.name = nodeName1 AND n2.name = nodeName2
RETURN n1, r, n2""")[1]['r']

({'name': 'Minoans',
  'location': 'Crete',
  'id': 'Minoans',
  'timeperiod': '1700 - 1500 BCE',
  'influence': 'earliest influences on Mycenaean Greeks'},
 'INFLUENCED',
 {'discoverydate': 'after Troy',
  'importance': 'burial sites, palace of Agamemnon',
  'name': 'Mycenae',
  'id': 'Mycenae'})

In [16]:
node_relationships = graph_handling.get_interrelationships_between_nodes(node_names.names)

In [17]:
node_relationships

[({'profession': 'archaeologist',
   'characteristics': 'brilliant linguist, questionable, problematic',
   'name': 'Heinrich Schliemann',
   'id': 'Heinrich Schliemann'},
  'DISCOVERED',
  {'discoverydate': '1870s',
   'rebuildings': 'several layers of different rebuildings',
   'importance': 'strategic location at mouth of water trade route',
   'name': 'Troy',
   'id': 'Troy'}),
 ({'profession': 'archaeologist',
   'characteristics': 'brilliant linguist, questionable, problematic',
   'name': 'Heinrich Schliemann',
   'id': 'Heinrich Schliemann'},
  'DISCOVERED',
  {'discoverydate': 'after Troy',
   'importance': 'burial sites, palace of Agamemnon',
   'name': 'Mycenae',
   'id': 'Mycenae'})]

In [32]:
node_relationship_summaries = list(map(lambda r : graph_handling.summarize_relationship(graph_handling.GPT4_TURBO, r, True), node_relationships))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: # Prompt for GPT-4:
Relationship between two nodes:
"({'profession': 'archaeologist', 'characteristics': 'brilliant linguist, questionable, problematic', 'name': 'Heinrich Schliemann', 'id': 'Heinrich Schliemann'}, 'DISCOVERED', {'discoverydate': '1870s', 'rebuildings': 'several layers of different rebuildings', 'importance': 'strategic location at mouth of water trade route', 'name': 'Troy', 'id': 'Troy'})"

# Task for GPT-4:
This is a relationship between two nodes in a Neo4j graph. Please use this information to give a summary of this relationship in a succinct paragraph that does not mention anything about a graph or nodes.
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: # Prompt for GPT-4:
Relationship between two nodes:
"({'profession': 'archaeologist', 'characteristics': 'brilliant linguist, questionable, problematic', 

In [33]:
node_relationship_summaries

["Heinrich Schliemann, an archaeologist renowned for his skills as a brilliant linguist yet also known for being questionable and problematic, made a significant discovery in the 1870s. He uncovered the ancient city of Troy, which was notable for its strategic location at the mouth of a water trade route and had undergone several layers of rebuildings over time. Schliemann's discovery of Troy was a milestone in the field of archaeology due to the city's historical and cultural importance.",
 'Heinrich Schliemann, an archaeologist renowned for his skills as a brilliant linguist yet also known for his questionable and problematic methods, made a significant discovery after his work in Troy. He uncovered the ancient burial sites and the palace of Agamemnon at Mycenae, which marked a notable contribution to the field of archaeology and our understanding of ancient civilizations.']

In [34]:
from langchain.schema import HumanMessage, SystemMessage

messages = [
        SystemMessage(
            content=f"""You are a helpful assistant that responds to user queries given the following context. You are given both a series of facts from a knowledge graph, and raw chunks of documents. Combine both sources to provide a truthful answer.
            
            Here is a collection of facts pulled from a knowledge graph:

            {node_relationship_summaries[0]}

            {node_relationship_summaries[1]}

            Here is a collection of relevants chunks of notes, pulled from a collection of the user's documents:

            {docs[0]}

            {docs[1]}

            {docs[2]}

            {docs[3]}

            {docs[4]}

            Use all of the above resources to answer the user's question in as much detail as the context can provide.
            """
        ),
        HumanMessage(
            content=query
        ),
    ]

In [35]:
result = graph_handling.GPT4_TURBO(messages)

In [36]:
result.content

"Heinrich Schliemann made two major archaeological discoveries:\n\n1. **Troy**: Schliemann discovered the ancient city of Troy in the 1870s. His work confirmed that Troy was a real place, which until then was known mainly from Homer's epic poems, the Iliad and the Odyssey. The city was found to have undergone several rebuildings over time and was located at a strategic position at the mouth of a water trade route.\n\n2. **Mycenae**: After his success at Troy, Schliemann went on to uncover the ancient burial sites and the palace of Agamemnon at Mycenae. This discovery was made in 1876 and was significant for the field of archaeology and our understanding of ancient civilizations. Mycenae was a massive Bronze Age walled city with habitation dating back to Neolithic times. Schliemann's excavation revealed immense wealth, including gold, which indicated that the Mycenaeans had engaged in extensive trade or warfare, as gold was not native to Greece."