In [None]:
# My OpenAI Key
import os
os.environ['OPENAI_API_KEY'] = "INSERT OPENAI KEY"

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

## Using Knowledge Graph

#### Building the Knowledge Graph

In [2]:
from gpt_index import SimpleDirectoryReader, LLMPredictor
from gpt_index.indices.knowledge_graph.base import GPTKnowledgeGraphIndex
from langchain import OpenAI
from IPython.display import Markdown, display

In [21]:
documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()

In [4]:
# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002"))

In [None]:
# NOTE: can take a while! 
index = GPTKnowledgeGraphIndex(
    documents, 
    chunk_size_limit=512, 
    max_triplets_per_chunk=2,
    llm_predictor=llm_predictor
)

In [24]:
index.save_to_disk('index_kg.json')

In [None]:
# try loading
new_index = GPTKnowledgeGraphIndex.load_from_disk('index_kg.json', llm_predictor=llm_predictor)

#### Querying the Knowledge Graph

In [17]:
response = new_index.query(
    "Tell me more about Interleaf", 
    include_text=False, 
    response_mode="tree_summarize"
)

INFO:root:> Starting query: Tell me more about Interleaf
INFO:root:> Query keywords: ['company', 'Interleaf', 'history', 'software']
INFO:root:> Extracted relationships: The following are knowledge triplets in the form of (subset, predicate, object):
('Interleaf', 'made software for', 'creating documents')
('Interleaf', 'added', 'scripting language')
('software', 'generate', 'web sites')
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 312 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens


In [18]:
display(Markdown(f"<b>{response}</b>"))

<b>
Interleaf was a software company that made software for creating documents. They later added a scripting language to their software, which allowed users to generate web sites.</b>

In [15]:
response = new_index.query(
    "Tell me more about what the author worked on at Interleaf", 
    include_text=True, 
    response_mode="tree_summarize"
)

INFO:root:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:root:> Query keywords: ['work', 'author', 'Interleaf']
INFO:root:> Querying with idx: ed39a830-a116-41b9-a551-bdd348dba61d: life, we aren't consciously aware of much we're seeing. Most visual perceptio...
INFO:root:> Querying with idx: fa1cfbb9-782b-4352-b610-cdae080b8f4f: painting that looks like a certain kind of cartoon, you know it's by Roy Lich...
INFO:root:> Extracted relationships: The following are knowledge triplets in the form of (subset, predicate, object):
('Interleaf', 'made software for', 'creating documents')
('Interleaf', 'added', 'scripting language')
INFO:root:> Building index from nodes: 0 chunks
INFO:root:> [query] Total LLM token usage: 1254 tokens
INFO:root:> [query] Total embedding token usage: 0 tokens


In [16]:
display(Markdown(f"<b>{response}</b>"))

<b>
The author worked on software that allowed users to create documents, similar to Microsoft Word. The software also had a scripting language that was based on Lisp.</b>

#### Visualizing the Graph

In [27]:
## create graph
from pyvis.network import Network

g = new_index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")