In [1]:
# Import necessary libraries
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Load environment variables
load_dotenv()

# Load documents from a directory (you can change this path as needed)
documents = SimpleDirectoryReader("data").load_data()

# Create an index from the documents
index = VectorStoreIndex.from_documents(documents)

# Create a query engine
query_engine = index.as_query_engine()

# Example query
response = query_engine.query("What years does the strategic plan cover?")

print(response)

The strategic plan covers the years 2024-2028.


In [2]:
import openai
import os

# Create an index from the documents
index = VectorStoreIndex.from_documents(documents)

# Create a retriever to fetch relevant documents
retriever = index.as_retriever(retrieval_mode='similarity', k=3)

# Define your query
query = "What years does the strategic plan cover?"

# Retrieve relevant documents
relevant_docs = retriever.retrieve(query)

print(f"Number of relevant documents: {len(relevant_docs)}")
print("\n" + "="*50 + "\n")

for i, doc in enumerate(relevant_docs):
    print(f"Document {i+1}:")
    print(f"Text sample: {doc.node.get_content()[:200]}...")  # Print first 200 characters
    print(f"Metadata: {doc.node.metadata}")
    print(f"Score: {doc.score}")
    print("\n" + "="*50 + "\n")

api_key = os.getenv("OPENAI_API_KEY")
endpoint_url = "https://api.openai.com/v1"
client = openai.AsyncClient(api_key=api_key, base_url=endpoint_url)


# Craft an LLM prompt that combines the documents with the query
prompt = f"""Based on the following documents, please answer the query: "{query}"

Relevant documents:
"""

for i, doc in enumerate(relevant_docs):
    prompt += f"\nDocument {i+1}:\n{doc.node.get_content()}\n"

prompt += "\nPlease provide a short answer based on the information in these documents."

# Generate a response using the OpenAI API
async def generate_response(prompt):
    response = await client.chat.completions.create(
        model="chatgpt-4o-latest",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that answers questions based on provided documents."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# Run the async function
llm_response = await generate_response(prompt)

print("LLM Response:")
print(llm_response)


Number of relevant documents: 2


Document 1:
Text sample: CodePath
2024-28
Strategic
Plan
+
Appendices
(V1
|
04.23.24)
Executive
Summary
Problem
Solution
Impact
and
Evidence
To
Date
Introduction
to
the
2024-28
Plan
Pillar
1:
Scale
Nationally
in
Breadth,
and
...
Metadata: {'page_label': '1', 'file_name': 'CodePath strategic plan.pdf', 'file_path': '/Users/nikrad/MaxAcademy/wk_2_rag_demo/data/CodePath strategic plan.pdf', 'file_type': 'application/pdf', 'file_size': 1452743, 'creation_date': '2024-09-17', 'last_modified_date': '2024-09-16'}
Score: 0.8278842030229911


Document 2:
Text sample: 2023-24
Corporate
Partners
Alteryx
Amazon
Asurion
Base10
Bentley
Systems
Bill.com
Bloomberg
Boeing
Brooks
Running
Course
Hero
Dick’s
Sporting
Goods
Docusign
Facebook
Fastly
Lyft
Meta
Microsoft
Mutual
...
Metadata: {'page_label': '48', 'file_name': 'CodePath strategic plan.pdf', 'file_path': '/Users/nikrad/MaxAcademy/wk_2_rag_demo/data/CodePath strategic plan.pdf', 'file_type': 'application/pdf', 'f