In [1]:
import os
from dotenv import load_dotenv, find_dotenv
import warnings
import nest_asyncio

warnings.filterwarnings('ignore')
nest_asyncio.apply()
_ = load_dotenv(find_dotenv())

## Setup neo4j

In the terminal, type:
```
docker run \
    -p 7474:7474 -p 7687:7687 \
    -v "$PWD/data:/data" -v "$PWD/plugins:/plugins" \
    --name neo4j-apoc \
    -e NEO4J_apoc_export_file_enabled=true \
    -e NEO4J_apoc_import_file_enabled=true \
    -e NEO4J_apoc_import_file_use__neo4j__config=true \
    -e NEO4JLABS_PLUGINS=\[\"apoc\"\] \
    neo4j:latest
```

From here, you can open the db at http://localhost:7474/. On this page, you will be asked to sign in. Use the default username/password of neo4j and neo4j.

Once you login for the first time, you will be asked to change the password.

After this, you are ready to create your first property graph!

In [2]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="llamaindex",
    url="bolt://localhost:7687",
)



## Setup Qdrant Client
In the terminal, type:
```
docker run -p 6333:6333 -p 6334:6334 \
    -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
    qdrant/qdrant
```
This will spin up a Qdrant vector database instance

In [3]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient, AsyncQdrantClient

client = QdrantClient(host="localhost", port=6333)
aclient = AsyncQdrantClient(host="localhost", port=6333)

# delete collection if it exists
if client.collection_exists("PaulGraham"):
    client.delete_collection("PaulGraham")

vector_store = QdrantVectorStore(
    "Titus_Resume",
    client = client,
    aclient = aclient,
    fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
)

Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

## Ingest documents

In [4]:
from llama_index.core import SimpleDirectoryReader

docs = SimpleDirectoryReader(input_files=["../../../data/paul_graham_essay.txt"]).load_data()

## Setup PropertyGraphIndex

In [5]:
from llama_index.core import (
    Settings,
    StorageContext,
    load_index_from_storage
)
from llama_index.core.indices import PropertyGraphIndex
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding()

In [6]:
from llama_index.core.indices.property_graph import (
    SimpleLLMPathExtractor,
    ImplicitPathExtractor
)

simple_path_extractor = SimpleLLMPathExtractor(
    max_paths_per_chunk=10,
    num_workers=4,
)
implicit_path_extractor = ImplicitPathExtractor()

In [7]:
index = PropertyGraphIndex.from_documents(
    docs,
    property_graph = graph_store,
    vector_store = vector_store,
    embed_kg_nodes = True,
    kg_extractors = [
        simple_path_extractor,
        implicit_path_extractor
    ]
)

Some ingestion errors with Neo4j observed. Qdrant works fine. Changing to SimplePropertyGraphStore

In [8]:
from llama_index.core.graph_stores import (
    SimplePropertyGraphStore
)

graph_store2 = SimplePropertyGraphStore()

In [9]:
if client.collection_exists("PaulGraham"):
    client.delete_collection("PaulGraham")

index2 =  PropertyGraphIndex.from_documents(
    docs,
    property_graph = graph_store2,
    vector_store = vector_store,
    embed_kg_nodes = True,
    kg_extractors = [
        simple_path_extractor,
        implicit_path_extractor
    ],
    show_progress=True
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting paths from text: 100%|██████████| 21/21 [00:14<00:00,  1.50it/s]
Extracting implicit paths: 100%|██████████| 21/21 [00:00<00:00, 33554.43it/s]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
Generating embeddings: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


In [10]:
index2.storage_context.persist("./storage")

In [12]:
query_engine = index2.as_query_engine()
query_engine.query("What did Paul Graham do growing up?")

Response(response="The provided information does not detail Paul Graham's experiences or activities while growing up. It primarily focuses on his work on the programming language Bel and his writing process.", source_nodes=[NodeWithScore(node=TextNode(id_='8f2cd0ab-905b-49b6-9383-d3a2f263f0cb', embedding=None, metadata={'file_path': '../../../data/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2024-09-28', 'last_modified_date': '2024-09-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='357e8a82-f93d-4b54-b25b-7660fb59f4e5', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../../../data/paul_graham_essay.txt', 'f

In [13]:
index2.property_graph_store.save_networkx_graph(name="./kg.html")