# Text to Graph Extraction using PropertyGraphIndex
Source data: faculty manual

In [4]:
import os

In [13]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

In [5]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_API_KEY")

In [6]:
documents = SimpleDirectoryReader(
    input_files=["paul_graham_essay.txt"]
).load_data()

In [7]:
import nest_asyncio

nest_asyncio.apply()

In [12]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="abc12345",
    url="bolt://localhost:7687",
)



In [14]:
index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
    kg_extractors=[
        SchemaLLMPathExtractor(
            llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0)
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00,  9.35it/s]
Extracting paths from text with schema: 100%|██████████| 21/21 [01:20<00:00,  3.82s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.19it/s]


In [15]:
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)

nodes = retriever.retrieve("What happened at Interleaf and Viaweb?")

for node in nodes:
    print(node.text)

paul graham -> WORKED_ON -> Viaweb
Viaweb -> LOCATED_IN -> Cambridge
Paul Graham -> WORKED_ON -> Viaweb
Paul Graham -> WORKED_ON -> World Wide Web
Paul Graham -> WORKED_ON -> Hacker News
Paul Graham -> WORKED_ON -> Interpreter


In [16]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("What happened at Interleaf and Viaweb?")

print(str(response))

Paul Graham worked on Viaweb and Interleaf.
