In [None]:
from llama_index import SimpleDirectoryReader
from llama_index import Document
from llama_index.node_parser import SimpleNodeParser
from llama_index.schema import IndexNode
from llama_index.llms import OpenAI
from llama_index import ServiceContext
from llama_index import VectorStoreIndex
from llama_index.query_engine import RetrieverQueryEngine

# For openai key
import os
os.environ["OPENAI_API_KEY"] = " "

In [None]:
# load pdf
documents = SimpleDirectoryReader(input_files=[" "]).load_data()

# combine documents into one
doc_text = "\n\n".join([d.get_content() for d in documents])
text= [Document(text=doc_text)]

In [None]:
# set up text chunk
node_parser = SimpleNodeParser.from_defaults()

# split doc
base_nodes = node_parser.get_nodes_from_documents(text)

# reset node ids 
for i, node in enumerate(base_nodes):
    node.id_ = f"node-{i}"

In [None]:
# load embedding model
embed_model = resolve_embed_model("local:BAAI/bge-small-en")

# load llm
llm = OpenAI(model="gpt-3.5-turbo")

In [None]:
# set up service
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

In [None]:
# create & store in embeddings vectorstore index
index = VectorStoreIndex(base_nodes, service_context=service_context)

In [None]:
# create retriever
retriever = index.as_retriever()

In [None]:
# set up query engine
query_engine = RetrieverQueryEngine.from_args(retriever,
 service_context=service_context)

# query
response = query_engine.query("What is the content of the document?")
print(str(response))