In [None]:
%pip install -Uq llama-index

# -U (Upgrade): Tells pip to upgrade the package to the latest version if it's already installed.
# -q (Quiet): Suppresses most of the output messages (like installation progress and dependency checks), making the output cleaner.

In [None]:
import getpass
import os

os.environ['OPENAI_API_KEY'] = getpass.getpass("OpenAI API Key: ")

# os.environ is a dictionary-like object in the os module; it co ntains the environment variables of your operating system (e.g., PATH, HOME, OPENAI_API_KEY, etc.)
# prompt the user for input without showing what they type (nothing appears on screen);	ask for passwords or API keys

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

In [None]:
documents = SimpleDirectoryReader("data").load_data()

# “PDF / HTML / CSV / DOCX → data connectors → documents”

# - loads documents from the "data" directory. Internally:
# - uses a Data Connector to read each file
# - each file becomes a Document object that can be later split into Nodes (text chunks)

In [None]:
index = VectorStoreIndex.from_documents(documents)

# “document → node → embeddings → index”

# - converts the documents into nodes (chunks of text)
# - generates embeddings (via Hugging Face or OpenAI)
# - stored in a vector index to support semantic search

In [None]:
query_engine = index.as_query_engine()

# “query → router → retrievers → index”

# - This sets up the retrieval pipeline for querying the index.
# - It wraps the index in a QueryEngine that handles:
#   * Receiving the user query
#   * Routing it to one or more retrievers
#   * Returning top relevant nodes for synthesis

In [None]:
response = query_engine.query("What is the first article?")

# “query → retriever(s) → index → Response Synthesizer → LLM → response”

# - Ask a natural language question against the indexed documents
# - The query is embedded and compared to the stored document embeddings to retrieve the most relevant content
# - Relevant content is fetched and passed into the Response Synthesizer
# - the LLM is used to generate a human-readable answer

In [None]:
print(response)