<a href="https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/LlamaIndex_101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q llama-index==0.14.0 openai==1.107.0 chromadb==1.0.21 wikipedia==1.4.0 jedi==0.19.2 \
                llama-index-vector-stores-chroma==0.5.3 llama-index-llms-openai==0.5.4 llama-index-readers-wikipedia==0.4.0 \
                llama-index-embeddings-openai==0.5.0 llama-index-question-gen-guidance==0.4.1 guidance

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m951.0/951.0 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m59.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m257.2/257.2 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Allows running asyncio in environments with an existing event loop, like Jupyter notebooks.

import nest_asyncio
nest_asyncio.apply()

In [3]:
import os
# os.environ['OPENAI_API_KEY'] = '[OPENAI_API_KEY]'

from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [4]:
import logging
import sys

#You can set the logging level to DEBUG for more verbose output,
# or use level=logging.INFO for less detailed information.
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [5]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-5-mini", additional_kwargs={'reasoning_effort':'minimal'})
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

# Wikipedia Example

## LlamaHub Wikipedia Integration

In [6]:
from llama_index.readers.wikipedia import WikipediaReader

# Initialize WikipediaReader
reader = WikipediaReader()

In [7]:
# Load data from Wikipedia
documents = reader.load_data(pages=['Artificial intelligence', 'Natural language generation'])

In [8]:
len( documents )

2

## Save on DeepLake

In [9]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

# Load the vector store from the local storage.
db = chromadb.PersistentClient(path="./wikipedia-articles")
chroma_collection = db.get_or_create_collection("wikipedia-articles")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

## Create Nodes

In [10]:
from llama_index.core.node_parser import SimpleNodeParser

# Initialize the parser
parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=20)

# Parse documents into nodes
nodes = parser.get_nodes_from_documents(documents)
print( len( nodes ) )

51


## Storage Context

In [11]:
from llama_index.core import StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)

## Create index from Documents

In [12]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex(
    nodes=nodes, storage_context=storage_context
)

In [13]:
query_engine = index.as_query_engine()
response = query_engine.query("What does NLP stands for?")
response.response

'NLP stands for "Natural Language Processing."'

## Store/Load Vector Store

In [14]:
# Index Storage Checks
import os.path
from llama_index.core import StorageContext, load_index_from_storage

# Let's see if our index already exists in storage.
if not os.path.exists("./storage"):
    index.storage_context.persist()

else:
    # If the index already exists, we'll just load it:
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)

# Paul Graham Essay

In [15]:
!mkdir -p './paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O './paul_graham/paul_graham_essay.txt'

--2025-09-24 06:53:02--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘./paul_graham/paul_graham_essay.txt’


2025-09-24 06:53:02 (5.90 MB/s) - ‘./paul_graham/paul_graham_essay.txt’ saved [75042/75042]



In [16]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("./paul_graham").load_data()

In [17]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

# Load the vector store from the local storage.
db = chromadb.PersistentClient(path="./paul-graham")
chroma_collection = db.get_or_create_collection("paul-graham")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

In [18]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(similarity_top_k=10)

In [19]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.question_gen import LLMQuestionGenerator

question_gen = LLMQuestionGenerator.from_defaults()

query_engine_tools = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name="pg_essay",
            description="Paul Graham essay on What I Worked On",
        ),
    ),
]

query_engine = SubQuestionQueryEngine.from_defaults(
    question_gen=question_gen,
    query_engine_tools=query_engine_tools,
    use_async=True,
)


response = query_engine.query("How was Paul Grahams life different before, during, and after YC?")

print( ">>> The final response:\n", response )

Generated 6 sub questions.
[1;3;38;2;237;90;200m[pg_essay] Q: Summarize Paul Graham's life and activities before founding Y Combinator
[0m[1;3;38;2;90;149;237m[pg_essay] Q: Describe Paul Graham's experiences and activities while running Y Combinator
[0m[1;3;38;2;11;159;203m[pg_essay] Q: Outline Paul Graham's life and activities after stepping back from running Y Combinator
[0m[1;3;38;2;155;135;227m[pg_essay] Q: Identify key differences in Graham's daily routines, priorities, and public role across the before/during/after periods
[0m[1;3;38;2;237;90;200m[pg_essay] Q: Highlight major achievements and projects associated with each period (before, during, after YC)
[0m[1;3;38;2;90;149;237m[pg_essay] Q: Note any shifts in Graham's writing, public influence, or mentorship style across the three periods
[0m[1;3;38;2;11;159;203m[pg_essay] A: After stepping back from running Y Combinator, his life and activities unfolded roughly as follows:

- Transition out of YC leadership
  - He

## Other methods

In [20]:
# # Replace the SubQuestionQueryEngine section with this:
# from llama_index.core.query_engine import RouterQueryEngine
# from llama_index.core.selectors import PydanticSingleSelector
# from llama_index.core.tools import QueryEngineTool, ToolMetadata

# # Create query engine tools
# query_engine_tools = [
#     QueryEngineTool(
#         query_engine=query_engine,
#         metadata=ToolMetadata(
#             name="pg_essay",
#             description="Paul Graham essay on What I Worked On",
#         ),
#     ),
# ]

# # Use RouterQueryEngine instead of SubQuestionQueryEngine
# router_query_engine = RouterQueryEngine(
#     selector=PydanticSingleSelector.from_defaults(),
#     query_engine_tools=query_engine_tools,
# )

# # Test the query engine
# response = router_query_engine.query("What did Paul Graham work on?")
# print(response.response)


In [21]:
# from llama_index.core.query_engine import MultiStepQueryEngine
# from llama_index.core.indices.query.query_transform.base import StepDecomposeQueryTransform

# # Create multi-step query engine
# step_decompose_transform = StepDecomposeQueryTransform(llm=Settings.llm)

# multi_step_query_engine = MultiStepQueryEngine(
#     query_engine=query_engine,
#     query_transform=step_decompose_transform,
#     num_steps=2,
#     index_summary="Paul Graham essay about his work and experiences"
# )

# # Test multi-step query engine
# response = multi_step_query_engine.query("What did Paul Graham work on and what were his main achievements?")
# print(response.response)
