# Text to Graph Extraction using PropertyGraphIndex
Source data: faculty manual

In [30]:
import os

In [31]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

In [32]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_API_KEY")

In [33]:
documents = SimpleDirectoryReader(
    input_files=["facultymanual.pdf"]
).load_data()

In [34]:
import nest_asyncio

nest_asyncio.apply()

In [35]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="abc12345",
    url="bolt://localhost:7687",
)



In [36]:
index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
    kg_extractors=[
        SchemaLLMPathExtractor(
            llm=OpenAI(model="gpt-4o", temperature=0.0)
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

Parsing nodes: 100%|██████████| 148/148 [00:00<00:00, 2023.91it/s]
Extracting paths from text with schema:  39%|███▊      | 58/150 [01:38<02:01,  1.32s/it]Retrying llama_index.llms.openai.base.OpenAI._achat in 0.9028776551298324 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-BGsG5y0p7qAnSh6f8nhzI5kJ on tokens per min (TPM): Limit 30000, Used 29768, Requested 808. Please try again in 1.152s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.2043250424960491 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-BGsG5y0p7qAnSh6f8nhzI5kJ on tokens per min (TPM): Limit 30000, Used 29945, Requested 732. Please try again in 1.354s. Visit https://platform.openai.com/account/rate-limits to learn more.

In [37]:
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)

nodes = retriever.retrieve("Who is the founder of National University?")

for node in nodes:
    print(node.text)

University -> PART_OF -> International Association of Universities
University -> PART_OF -> Association of Southeast Asia Higher Institutions of Learning
University -> PART_OF -> Philippine Association of Colleges and Universities
University -> PART_OF -> University Athletic Association of the Philippines
Philippine Law School -> PART_OF -> NATIONAL UNIVERSITY
National Academy -> PART_OF -> NATIONAL UNIVERSITY
Colegio Mercantil -> PART_OF -> NATIONAL UNIVERSITY
NATIONAL UNIVERSITY -> LOCATED_IN -> Quiapo, City of Manila
National University -> LOCATED_IN -> customer premises
National University -> LOCATED_IN -> campus
HR -> PART_OF -> National University
National University -> LOCATED_IN -> National University campus
National University -> PART_OF -> Human Resources
National University -> PART_OF -> Human Resources Division
National University -> LOCATED_IN -> community


In [38]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("Who is the founder of National University?")

print(str(response))

Don Mariano Fortunato Jhocson is the founder of National University.


## If the graph already exists

In [39]:
index = PropertyGraphIndex.from_existing(
    property_graph_store=graph_store,
    llm=OpenAI(model="gpt-4o", temperature=0.3),
    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
)

In [46]:
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)

nodes = retriever.retrieve("What is the maximum OTE score")

for node in nodes:
    print(node.text)

Faculty Services Office -> PART_OF -> FSO
Campus HRO -> PART_OF -> academic and employment credentials
NIKKA E. CELESTE -> WORKED_ON -> Talent Development
Academic Operations Division -> PART_OF -> Academic Operations


In [51]:
query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("?")

print(str(response))

A faculty member can change their status from part-time to full-time by taking on a full-time teaching load per term. The steps involved in this transition typically include accepting a workload of twenty-four (24) credit hours per term and committing to a forty (40) hour work week.


In [48]:
# from llama_index.core import Document

# document = Document(text="What are the four SPES components?")

# index.insert(document)



In [None]:

# nodes = index.as_retriever(include_text=False).retrieve("LlamaIndex")

# print(nodes[0].text)