In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit import LexicalGraphIndex
from graphrag_toolkit.storage import GraphStoreFactory
from graphrag_toolkit.storage import VectorStoreFactory
from graphrag_toolkit.indexing.load import FileBasedChunks
from graphrag_toolkit.indexing.build import Checkpoint

from llama_index.readers.web import SimpleWebPageReader

import nest_asyncio
nest_asyncio.apply()

file_based_chunks = FileBasedChunks('./extracted/')
checkpoint = Checkpoint('extraction-checkpoint')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

graph_index = LexicalGraphIndex(
    graph_store, 
    vector_store
)

doc_urls = [
    'https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
    'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html',
    'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-features.html',
    'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-vs-neptune-database.html'
]

docs = SimpleWebPageReader(
    html_to_text=True,
    metadata_fn=lambda url:{'url': url}
).load_data(doc_urls)

graph_index.extract(docs, handler=file_based_chunks, checkpoint=checkpoint, show_progress=True)

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit import LexicalGraphIndex
from graphrag_toolkit.storage import GraphStoreFactory
from graphrag_toolkit.storage import VectorStoreFactory
from graphrag_toolkit.indexing.load import FileBasedChunks
from graphrag_toolkit.indexing.build import Checkpoint

import nest_asyncio
nest_asyncio.apply()

file_based_chunks = FileBasedChunks('./extracted/')
checkpoint = Checkpoint('build-checkpoint-3')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

graph_index = LexicalGraphIndex(
    graph_store, 
    vector_store
)

graph_index.build(file_based_chunks, checkpoint=checkpoint, show_progress=True)