# 02 - Cloud Build

## Setup

If you haven't already, install the toolkit and dependencies using the [Setup](./00-Setup.ipynb) notebook.

## Build

In [None]:
from dotenv import load_dotenv
import os

load_dotenv('/home/ec2-user/SageMaker/graphrag-toolkit/.env')

In [None]:
import os

from graphrag_toolkit.lexical_graph import LexicalGraphIndex, set_logging_config
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.indexing.load import S3BasedDocs
from graphrag_toolkit.lexical_graph.indexing.build import Checkpoint

set_logging_config('INFO')

docs = S3BasedDocs(
    region='us-east-1',
    bucket_name=os.environ['LOCAL_EXTRACT_S3'],
    key_prefix='extract-build',
    collection_id='best-practices'
)
checkpoint = Checkpoint('s3-build-checkpoint')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

graph_index = LexicalGraphIndex(
    graph_store,
    vector_store
)

graph_index.build(docs, checkpoint=checkpoint, show_progress=True)

print('Build complete')