## Initial RAG demo

In [4]:
# Import necessary libraries
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Load environment variables
load_dotenv()

# Load documents from a directory (you can change this path as needed)
documents = SimpleDirectoryReader("data").load_data()

# Create an index from the documents
index = VectorStoreIndex.from_documents(documents)

# Create a query engine
query_engine = index.as_query_engine()

# Example query
response = query_engine.query("What years does the strategic plan cover?")

print(response)

The strategic plan covers the years 2024 to 2028.


## Custom RAG pipeline

In [11]:
import openai

index = VectorStoreIndex.from_documents(documents)

retriever = index.as_retriever(retrieval_mode='similarity', k=3)

query = "What years does the strategic plan cover?"

relevant_docs = retriever.retrieve(query)

print(f"Number of relevant documents: {len(relevant_docs)}")
print("\n" + "="*50 + "\n")

document_contents = []

for i, doc in enumerate(relevant_docs):
    document_contents.append(doc.node.get_content()[:500])

llm_prompt = f"""
You are a helpful assistant. I have the following question: '{query}'.

I also have {len(document_contents)} document(s) related to this question:
"""

for idx, doc_text in enumerate(document_contents):
    llm_prompt += f"\nDocument {idx+1}:\n{doc_text}\n"

llm_prompt += "\nBased on the information from these documents, please answer the query that I asked you."

print(llm_prompt)

client = openai.OpenAI();

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": llm_prompt}
    ],
    temperature=0.2
)

print(response.choices[0].message.content)


Number of relevant documents: 2



You are a helpful assistant. I have the following question: 'What years does the strategic plan cover?'.

I also have 2 document(s) related to this question:

Document 1:
CodePath
2024-28
Strategic
Plan
+
Appendices
(V1
|
04.23.24)
Executive
Summary
Problem
Solution
Impact
and
Evidence
To
Date
Introduction
to
the
2024-28
Plan
Pillar
1:
Scale
Nationally
in
Breadth,
and
Regionally
in
Depth
Pillar
2:
Improve
Effectiveness
While
Reducing
Costs
Pillar
3:
Develop
Sustainable
Sources
of
Revenue
Measurement,
Evaluation,
and
Learning
Finance
Organization
Conclusion
Appendices
0

Document 2:
corporate
and
college
customers.
Given
that
it
takes
time
for
sales
personnel
to
ramp
up,
coupled
with
the
six-
to
nine-month
lead
time
required
to
close
six-figure
enterprise
deals,
this
investment
will
continue
through
2025.
If
revenue
materializes
as
expected
or
higher,
the
investment
will
continue;
if
revenue
is
lower
than
expected,
management
will
reduce
expenses
and
e