In [1]:
import os

In [2]:
with open("../key") as fs:
    os.environ['OPENAI_API_KEY'] = fs.readlines()[0].strip()

In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [4]:
### Using KG

In [3]:
# Building the KG
from gpt_index import SimpleDirectoryReader, LLMPredictor
from gpt_index.indices.knowledge_graph.base import GPTKnowledgeGraphIndex
from langchain import OpenAI
from IPython.display import Markdown, display

In [7]:
documents = SimpleDirectoryReader('paul_graham_essay/data').load_data()

In [4]:
# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002"))

In [11]:
# NOTE: can take a while! 
index = GPTKnowledgeGraphIndex(
    documents, 
    chunk_size_limit=512, 
    max_triplets_per_chunk=2,
    llm_predictor=llm_predictor
)

INFO:root:> [build_index_from_documents] Total LLM token usage: 24787 tokens
INFO:root:> [build_index_from_documents] Total embedding token usage: 0 tokens


In [12]:
# saving the index kg
index.save_to_disk('index_kg.json')

In [5]:
# Loading the new index
new_index = GPTKnowledgeGraphIndex.load_from_disk('index_kg.json', llm_predictor=llm_predictor)

In [14]:
# Querying the KG
response = new_index.query(
    "Tell me more about Interleaf", 
    include_text=False, 
    response_mode="tree_summarize"
)

INFO:root:> Starting query: Tell me more about Interleaf
INFO:root:> Query keywords: ['software', 'Interleaf', 'company', 'history']
INFO:root:> Extracted relationships: The following are knowledge triplets in the form of (subset, predicate, object):
('software', 'generate web sites for', 'galleries')
('Interleaf', 'got crushed by', "Moore's Law")
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 305 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens


In [15]:
display(Markdown(f"<b>{response}</b>"))

<b>
Interleaf was a software company that was acquired by Quark, Inc. in 2000. Interleaf produced software that generated web sites for galleries and other businesses.</b>

In [7]:
## Visualize the graph
## create graph
from pyvis.network import Network

g = new_index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")