In [1]:
import os, getpass

# Prompt for key if missing
if not os.environ.get("OPENROUTER_API_KEY"):
    os.environ["OPENROUTER_API_KEY"] = getpass.getpass("OPENROUTER_API_KEY: ")

### Adding RAG Capabilities

In [2]:
from llama_index.llms.openrouter import OpenRouter
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Create an LLM instance using OpenRouter
llm = OpenRouter(
    api_key=os.environ["OPENROUTER_API_KEY"],
    model="anthropic/claude-3.5-sonnet",
    is_chat_model=True,
    is_function_calling_model=True
)

# Settings control global defaults
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
Settings.llm = llm

# Create a RAG tool using LlamaIndex
# Load documents from a directory
# Ensure you have a directory named "data" with text files to read
documents = SimpleDirectoryReader("data").load_data()

index = VectorStoreIndex.from_documents(
    documents,
    show_progress=True,
    # we can optionally override the embed_model here
    # embed_model=Settings.embed_model,
)
query_engine = index.as_query_engine(
    # we can optionally override the llm here
    # llm=Settings.llm,
)

def multiply(a: float, b: float) -> float:
    """Useful for multiplying two numbers."""
    return a * b


async def search_documents(query: str) -> str:
    """Useful for answering natural language questions about an personal essay written by Paul Graham."""
    response = await query_engine.aquery(query)
    return str(response)


# Create an enhanced workflow with both tools
agent = AgentWorkflow.from_tools_or_functions(
    [multiply, search_documents],
    llm=Settings.llm,
    system_prompt="""You are a helpful assistant that can perform calculations
    and search through documents to answer questions.""",
)


# Now we can ask questions about the documents or do calculations
response = await agent.run("What did the author do in college? Also, what's 7 * 8?")
print(response)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

To answer your questions:

1. In college, the author (Paul Graham):
- Initially studied philosophy, hoping to learn "ultimate truths"
- Found philosophy disappointing and switched to AI
- Was influenced by "The Moon is a Harsh Mistress" and a PBS documentary about SHRDLU
- Later pursued a PhD in computer science
- Completed his dissertation on applications of continuations in about 5 weeks
- Was also interested in art and Lisp programming during this time

2. 7 * 8 = 56


### Storing the RAG Index

In [3]:
# Save the index
index.storage_context.persist("storage")