In [1]:
%load_ext dotenv
%dotenv

In [2]:
import logging
import sys
import os

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)

## 1. Preparation
### 1.1 Prepare LLM

In [3]:
from llama_index.llms import Ollama

OLLAMA_HOST = 'localhost'
OLLAMA_MODEL = 'mistral'
llm = Ollama(model=OLLAMA_MODEL, base_url="http://"+OLLAMA_HOST+":11434")

In [11]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(
    llm=llm, 
    # To save costs, we use a local model.
    # This will use a well-performing and fast default from Hugging Face.
    # this model has dim of 384 https://huggingface.co/BAAI/bge-small-en
    embed_model="local:BAAI/bge-small-en",
)

### 1.2 Prepare Graph Store
`Neo4j` is supported as a graph store integration. You can persist, visualize, and query graphs using LlamaIndex and Neo4j. Furthermore, existing Neo4j graphs are directly supported using `text2cypher` and the `KnowledgeGraphQueryEngine`.

If you’ve never used Neo4j before, you can download the desktop client [here](https://neo4j.com/download/).

Once you open the client, create a new project and install the `apoc` integration. Full instructions here. Just click on your project, select `Plugins` on the left side menu, install APOC and restart your server.

In [5]:
username = "neo4j"
password = os.environ["NEO4J_PASSWORD"]
# Neo4j cloud has a generous free tier, so I use that instead of localhost
url = "bolt://localhost:7687"
# url = "bolt+s://3b2530f1.databases.neo4j.io:7687"
database = "neo4j"

In [6]:
from llama_index.graph_stores import Neo4jGraphStore

graph_store = Neo4jGraphStore(
    username=username,
    password=password,
    url=url,
    database=database,
)

In [7]:
from llama_index.storage.storage_context import StorageContext

storage_context = StorageContext.from_defaults(graph_store=graph_store)

## 2. Build the Knowledge Graph
### 2.1 Preprocess Data

In [8]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False)

### 2.2 Extract Triplets and Save to Graph
reference:
- [KnowledgeGraphIndex](https://docs.llamaindex.ai/en/stable/api_reference/indices/kg.html#llama_index.indices.knowledge_graph.KnowledgeGraphIndex)

In [12]:
from llama_index import KnowledgeGraphIndex

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=10,
    include_embeddings=True,
    show_progress=True
    # max_object_length: int = 128,
    
    # to extract triplets, kg_triplet_extract_fn is used if not None,
    # kg_triplet_extract_fn: Optional[Callable] = None, 
    # else, the LLM from the service context is used with the kg_triple_extract_template if not None else the default triplet extract prompt
    # kg_triple_extract_template: Optional[BasePromptTemplate] = None,
)

Parsing nodes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.75it/s]
Processing nodes:   0%|                                                                                                                                               | 0/16 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                          | 0/12 [00:00<?, ?it/s][A
Generating embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 37.97it/s][A
Processing nodes:   6%|████████▍                                                                                                                              | 1/16 [00:15<03:46, 15.08s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/4 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 27.65it/s][A
Processing nodes:  12%|████████████████▉                                                                                                                      | 2/16 [00:29<03:28, 14.88s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                          | 0/11 [00:00<?, ?it/s][A
Generating embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 44.77it/s][A
Processing nodes:  19%|█████████████████████████▎                                                                                                             | 3/16 [00:41<02:56, 13.61s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                          | 0/11 [00:00<?, ?it/s][A
Generating embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 62.60it/s][A
Processing nodes:  25%|█████████████████████████████████▊                                                                                                     | 4/16 [00:53<02:32, 12.69s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/6 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 49.73it/s][A
Processing nodes:  31%|██████████████████████████████████████████▏                                                                                            | 5/16 [01:12<02:46, 15.10s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/5 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 41.95it/s][A
Processing nodes:  38%|██████████████████████████████████████████████████▋                                                                                    | 6/16 [01:26<02:27, 14.72s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/7 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 61.01it/s][A
Processing nodes:  44%|███████████████████████████████████████████████████████████                                                                            | 7/16 [01:39<02:08, 14.24s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 172.88it/s][A
Processing nodes:  50%|███████████████████████████████████████████████████████████████████▌                                                                   | 8/16 [01:48<01:39, 12.45s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                          | 0/11 [00:00<?, ?it/s][A
Generating embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 48.68it/s][A
Processing nodes:  56%|███████████████████████████████████████████████████████████████████████████▉                                                           | 9/16 [02:08<01:44, 14.87s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 165.04it/s][A
Processing nodes:  62%|███████████████████████████████████████████████████████████████████████████████████▊                                                  | 10/16 [02:23<01:28, 14.74s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/9 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 79.52it/s][A
Processing nodes:  69%|████████████████████████████████████████████████████████████████████████████████████████████▏                                         | 11/16 [02:38<01:14, 14.85s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 162.62it/s][A
Processing nodes:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 12/16 [02:49<00:55, 13.90s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/9 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 80.65it/s][A
Processing nodes:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 13/16 [03:03<00:41, 13.86s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/1 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.14it/s][A
Processing nodes:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 14/16 [03:21<00:30, 15.13s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings:   0%|                                                                                                                                           | 0/2 [00:00<?, ?it/s][A
Generating embeddings: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 19.02it/s][A
Processing nodes:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 15/16 [03:41<00:16, 16.50s/it]

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



Generating embeddings: 0it [00:00, ?it/s][A
Processing nodes: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [03:52<00:00, 14.51s/it]


### Conclusions from building the KG

- we can visualize the graph in Neo4j Bloom directly on top of Neo4j local or Neo4j AuraDB
- we have 1 Node type: Entity
    ```cypher
    MATCH (n)
    RETURN distinct labels(n)[0] as label, count(n) as node_count
    ```
- 80 Relationship type
    ```cypher
    MATCH p=()-->() RETURN count(p)
    ```
- Entities only have 1 field called `id`, we don't have entity type like "Person" etc... NER would be needed


## 3. Create VectorStoreIndex for RAG

References:
- [Neo4jVectorDemo](https://docs.llamaindex.ai/en/latest/examples/vector_stores/Neo4jVectorDemo.html#)
- [Neo4jVectorStore](https://docs.llamaindex.ai/en/stable/api/llama_index.vector_stores.Neo4jVectorStore.html#llama_index.vector_stores.Neo4jVectorStore)

In [23]:
from llama_index.vector_stores import Neo4jVectorStore

neo4j_vector = Neo4jVectorStore(
    username=username, 
    password=password, 
    url=url, 
    # "local:BAAI/bge-small-en" has dim of 384 https://huggingface.co/BAAI/bge-small-en
    embedding_dimension=384,
    database=database,
    index_name='vector'
)

In [24]:
from llama_index.storage.storage_context import StorageContext
from llama_index import VectorStoreIndex

storage_context_vector = StorageContext.from_defaults(vector_store=neo4j_vector)
vector_index = VectorStoreIndex.from_documents(
    documents, 
    storage_context=storage_context_vector, 
    service_context=service_context,
)

At this stage we can check with a cypher query that the stored embeddings have the right dimension 384
```cypher
MATCH (n:Chunk) RETURN size(n.embedding)
```

### Conclusions from building the Vector DB
- there are 16 chunks for 1 wikipedia page, they contain the text, the embedding, and ids
- there are no relationships
- the chunks are not related to entities at all
- the chunks get dumped into the same Graph database than the entity graph (unless you pass a different parameter), but for simplicity with Bloom viz, I chose to use the same database
- the embedding is the embedding for the chunk which can be more or less long so more or less "averaged"

## 4. (Optional) Persist and Load from disk Llama Indexes
### 4.1. Persist

### 4.2. Restore
In Llama Index, there are two scenarios we could apply Graph RAG:

- Build Knowledge Graph from documents with Llama Index, with LLM or even local models, to do this, we should go for `KnowledgeGraphIndex`.

- Leveraging existing Knowledge Graph, in this case, we should use `KnowledgeGraphRAGQueryEngine`.



In [31]:
existing_vector = Neo4jVectorStore(
    username,
    password,
    url,
    384,
    index_name="vector",
    text_node_property="text",
    embedding_node_property="embedding"
)
vector_index = VectorStoreIndex.from_vector_store(
    existing_vector,
    service_context=service_context,
)

## 5. Prepare for different query approaches

### 5.1 Graph RAG query engine

There are issues with the llama_index docs as I describe [here](https://github.com/run-llama/llama_index/issues/10474)

reference:

- [KGTableRetriever](https://docs.llamaindex.ai/en/stable/api_reference/query/retrievers/kg.html#llama_index.indices.knowledge_graph.retrievers.KGTableRetriever) ??
- [KnowledgeGraphRAGRetriever](https://docs.llamaindex.ai/en/stable/api_reference/query/retrievers/kg.html#llama_index.indices.knowledge_graph.retrievers.KnowledgeGraphRAGRetriever) "Retriever that perform SubGraph RAG towards knowledge graph."
- [RetrieverQueryEngine](https://docs.llamaindex.ai/en/stable/api_reference/query/query_engines/retriever_query_engine.html)

In [16]:
# in case we just built the index and we have it available
kg_rag_query_engine = index.as_query_engine(
    include_text=False, 
    retriever_mode="keyword",
    response_mode="tree_summarize"
)

In [17]:
from llama_index.indices.knowledge_graph.retrievers import KGTableRetriever

assert type(kg_rag_query_engine.retriever) == KGTableRetriever

In [10]:
# in case we want to restore it
# from llama_index.query_engine import RetrieverQueryEngine
# from llama_index.retrievers import KnowledgeGraphRAGRetriever

# # here I use a KnowledgeGraphRAGRetriever instead of a KGTableRetriever
# graph_rag_retriever = KnowledgeGraphRAGRetriever(
#     storage_context=storage_context,
#     service_context=service_context,
#     llm=llm,
#     verbose=True,
# )

# kg_rag_query_engine = RetrieverQueryEngine.from_args(
#     graph_rag_retriever,
#     service_context=service_context,
# )

### 5.2. Vector RAG query engine

In [18]:
vector_rag_query_engine = vector_index.as_query_engine()

In [19]:
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine

assert type(vector_rag_query_engine) == RetrieverQueryEngine

### 5.3 Graph+Vector RAG query engine

In [28]:
# import QueryBundle
from llama_index import QueryBundle

# import NodeWithScore
from llama_index.schema import NodeWithScore

# Retrievers
from llama_index.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever

from typing import List


class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both Vector search and Knowledge Graph search"""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        kg_retriever: KGTableRetriever,
        mode: str = "OR",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._kg_retriever = kg_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        kg_nodes = self._kg_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        kg_ids = {n.node.node_id for n in kg_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in kg_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(kg_ids)
        else:
            retrieve_ids = vector_ids.union(kg_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

In [29]:
from llama_index import get_response_synthesizer
from llama_index.query_engine import RetrieverQueryEngine

# create custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index)
kg_retriever = KGTableRetriever(
    index=kg_index, retriever_mode="keyword", include_text=False
)
custom_retriever = CustomRetriever(vector_retriever, kg_retriever)

# create response synthesizer
response_synthesizer = get_response_synthesizer(
    service_context=service_context,
    response_mode="tree_summarize",
)

In [30]:
graph_vector_rag_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

## 6. Query with the engines

In [20]:
from IPython.display import Markdown, display

In [21]:
response_graph_rag = kg_rag_query_engine.query("Tell me about Peter Quill.")

display(Markdown(f"<b>{response_graph_rag}</b>"))

INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


<b> Peter Quill is the leader of the Guardians of the Galaxy. He is known to have a half-sister named Mantis, who is also a member of the Guardians. The Guardians have traveled to various locations including Orgocorp's headquarters and Counter-earth, and have been attacked by Adam warlock. Quill has repaid a favor to James Gunn and has stated intentions for future projects in the Marvel Cosmic Universe. He has also been known to use offensive language and has expressed reluctance to return to the franchise without certain characters. Quill was confirmed to return to write and direct Guardians of the Galaxy vol. 3 in April 2017.</b>

In [22]:
response_vector_rag = vector_rag_query_engine.query("Tell me about Peter Quill.")

display(Markdown(f"<b>{response_vector_rag}</b>"))

ClientError: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 384 dimensions, but indexed vectors have 1536.}

In [39]:
response_graph_vector_rag = graph_vector_rag_query_engine.query("Tell me about Peter Quill.")

display(Markdown(f"<b>{response_graph_vector_rag}</b>"))

NameError: name 'graph_vector_rag_query_engine' is not defined

## 7. Comparison and Conclusion

In [32]:
analysis = llm.complete(f"""
Compare the QA results on "Tell me about Peter Quill.", list the knowledge facts between them, to help evalute them. Output in markdown table.

Result from Graph: {response_graph_rag}
---
Result from Vector: {response_vector_rag}
---
Result Graph+Vector: {response_graph_vector_rag}
---

""")

In [33]:
display(Markdown(analysis.text))

 | Fact | Graph Result | Vector Result | Both Results |
| --- | --- | --- | --- |
| Character Name | Peter Quill / Star-Lord | Peter Quill / Star-Lord | Peter Quill / Star-Lord |
| Universe | Marvel Comics | Marvel Cinematic Universe | Marvel Universe |
| Creation | Created by Dan Abnett and Andy Lanning, first appeared in "Annihilators" #1 in July 2006 | Portrayed by Chris Pratt in films since 2014 | Created, first appearance in respective universes |
| Species | Human from Earth | Human who was abducted from Earth as a child and grew up among extraterrestrial beings | Human, abducted from Earth as a child, raised among aliens |
| Birth Year | Born in 1982 | N/A | Born in 1982 |
| Backstory | Raised among the Ravagers, skilled mercenary and thief, leads Guardians of the Galaxy | Skilled in combat and piloting spaceships, quirky sense of humor, love for classic Earth music, depicted as more traditional superhero compared to the Guardians | Raised among the Ravagers, became a space adventurer, joined the Guardians of the Galaxy |
| Weaponry | Star-Lord gun, Orb, Milano spaceship | N/A | Advanced weaponry, including Star-Lord gun and Orb, Milano spaceship |
| Personality | Charming, rebellious, sarcastic, strong sense of loyalty to those he cares about, romantically involved with Gamora and Rocket Raccoon | Quirky sense of humor, love for classic Earth music, depicted as a more traditional superhero compared to the Guardians | Charming, rebellious, sarcastic, romantic relationships with Gamora and Rocket Raccoon |
| Films | N/A | Appeared in "Guardians of the Galaxy" (2014), "Guardians of the Galaxy Vol. 2" (2017), "Avengers: Infinity War" (2018), and "Avengers: Endgame" (2018) | N/A, appears in films |
| Director | N/A | James Gunn was fired from the franchise over old tweets, but negotiations to return ultimately failed | N/A, no mention of director |
| Depiction in "Vol. 3" | In a state of depression following Gamora's appearance | N/A | In a state of depression following Gamora's appearance in "Guardians of the Galaxy Vol. 3" |