# KitchenAI Simple RAG Bento

This is a simple rag that should get developers up and running as quickly as possible and provide all the necessary RAG for their application

In [1]:
%load_ext kitchenai.contrib.notebooks


INFO HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"


In [2]:
%kitchenai_set_project kitchenai-bento-simple-rag

"Project name set to 'kitchenai-bento-simple-rag'."

# Imports 


In [3]:
from kitchenai_llama.storage.llama_parser import Parser
from llama_index.llms.litellm import LiteLLM
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.openai import OpenAI
import os 
import chromadb
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.extractors import (
    TitleExtractor,
    QuestionsAnsweredExtractor)
from llama_index.core import Document



In [7]:
%kitchenai_import_previous_cell kitchenai-bento-rag-simple-imports

kitchenai_result: Registered new CodeImport with hash 13b980c4893f2e5a1d6a010f18c2276ec9427a5cfbc5ac60e4f6dd591a68ebb0.


# Setup globals 

In [4]:
llm = LiteLLM("gpt-4o")
chroma_client = chromadb.PersistentClient(path="chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")



In [9]:
%kitchenai_setup_previous_cell kitchenai-bento-simple-rag

kitchenai_result: setup 'kitchenai-bento-simple-rag' is already registered with the same code.


In [20]:
from kitchenai.contrib.kitchenai_sdk.schema import StorageSchema


storage_data = StorageSchema(
dir="./data",
metadata={"source": "notebook", "category": "mock"},
extension=".pdf"
)

def simple_storage(data: StorageSchema, **kwargs):
    """
    Parse a directory of documents and store them in a vector database. This is run in a background task.
    Args:
        data: StorageSchema
    """
    parser = Parser(api_key=os.environ.get("LLAMA_CLOUD_API_KEY", None))
    response = parser.load(data.dir, metadata=data.metadata, **kwargs)
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    VectorStoreIndex.from_documents(
        response["documents"], storage_context=storage_context, show_progress=True,
            transformations=[TokenTextSplitter(), TitleExtractor(),QuestionsAnsweredExtractor()]
    )

    


In [21]:
%kitchenai_register_previous_cell storage kitchenai-bento-simple-rag

kitchenai_result: Registered new function 'kitchenai-bento-simple-rag' of type 'storage' with hash e344d1c9f726de475f8d4ee6971fe560aa7552e193783ff1428352effb6947b9.


In [12]:
result = simple_storage(storage_data)
result

Parsing nodes:   0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:00<00:00,  1.23it/s]
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:00<00:00,  1.94it/s]
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:00<00:00,  1.95it/s]
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/

Generating embeddings:   0%|          | 0/20 [00:00<?, ?it/s]

INFO HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


# Query Function

In [6]:
from kitchenai.contrib.kitchenai_sdk.schema import QuerySchema, QueryBaseResponseSchema
import asyncio

query_data = QuerySchema(
    query="Summarize the rfp",
    stream=False,
    metadata={"topic": "healthcare", "keyword": "AI"}
)

async def kitchenai_bento_simple_rag_vjnk(data: QuerySchema):
    """
    Query the vector database with a chat interface
    class QuerySchema(Schema):
        query: str
        stream: bool = False
        metadata: dict[str, str] | None = None
    Args:
        data: QuerySchema
    
    Response:
        QueryBaseResponseSchema:
            input: str | None = None
            output: str | None = None
            retrieval_context: list[str] | None = None
            generator: Callable | None = None
            metadata: dict[str, str] | None = None
    """
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    index = VectorStoreIndex.from_vector_store(
        vector_store,
    )
    query_engine = index.as_query_engine(chat_mode="best", llm=llm, verbose=True)
    response = await query_engine.aquery(data.query)
    print("metadata:", response.metadata)
    print("response:", response.source_nodes)
    return QueryBaseResponseSchema(output=response.response)

async def run_query():
    response = await kitchenai_bento_simple_rag_vjnk(query_data)
    print("output:", response.output)
    # print("retrieval_context:", response.retrieval_context)


In [15]:
%kitchenai_register_previous_cell query kitchenai-bento-simple-rag


kitchenai_result: Registered new function 'kitchenai-bento-simple-rag' of type 'query' with hash d082441dee5a8ce4d42c7ea8cb09d078fb59f74f03bdd36374c6875a9c0c175e.


In [16]:
asyncio.run(run_query())

INFO HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[92m22:11:06 - LiteLLM:INFO[0m: utils.py:2699 - 
LiteLLM completion() model= gpt-4o; provider = openai
INFO 
LiteLLM completion() model= gpt-4o; provider = openai
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


metadata: {'8b9e88a0-5b3a-4f9a-8e44-c1707adbf159': {'page_label': '6', 'file_name': 'mock-rfp.pdf', 'source': 'notebook', 'category': 'mock', 'document_title': 'Procurement Guidelines and Terms for Request for Proposal (RFP) Submission', 'questions_this_excerpt_can_answer': '1. What is the duration of prices for the proposal submitted in response to this RFP?\n2. What law governs this RFP and any resulting contract?\n3. Can the City reject proposals and waive technical defects at its sole discretion?\n4. Are proposal expenses the responsibility of the City or the respondent?\n5. How does the City handle confidentiality of proposals and what is the process for designating material as confidential?'}, 'dd37669a-77e4-46b5-81ec-4d8f0ee40b85': {'page_label': '5', 'file_name': 'mock-rfp.pdf', 'source': 'notebook', 'category': 'mock', 'document_title': 'Proposal Submission Requirements and Price Proposal for City of Takoma Park', 'questions_this_excerpt_can_answer': '1. What is the contact in

In [7]:
#Streaming Query

async def kitchenai_bento_simple_rag_stream_vjnk(data: QuerySchema):
    """
    Query the vector database with a chat interface
    class QuerySchema(Schema):
        query: str
        stream: bool = False
        metadata: dict[str, str] | None = None
    Args:
        data: QuerySchema
    
    Response:
        QueryBaseResponseSchema:
            input: str | None = None
            output: str | None = None
            retrieval_context: list[str] | None = None
            generator: Callable | None = None
            metadata: dict[str, str] | None = None
    """
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    index = VectorStoreIndex.from_vector_store(
        vector_store,
    )
    query_engine = index.as_query_engine(chat_mode="best", llm=llm, streaming=True)
    
    streaming_response = await query_engine.aquery(data.query)


    return QueryBaseResponseSchema(stream_gen=streaming_response.response_gen)


In [9]:
async def run_query_stream():
    response = await kitchenai_bento_simple_rag_stream_vjnk(query_data)
    async for chunk in response.stream_gen:
        print(chunk)
    # print("retrieval_context:", response.retrieval_context)


asyncio.run(run_query_stream())


INFO HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



The
 Request
 for
 Proposal
 (
R
FP
)
 outlines
 guidelines
 and
 terms
 for
 submitting
 proposals
,
 including
 provisions
 on
 confidentiality
,
 proposal
 expenses
,
 rejection
 of
 proposals
,
 duration
 of
 prices
,
 acceptance
 of
 terms
 and
 conditions
,
 and
 the
 governing
 procurement
 law
.



# Embeddings

In [17]:
from kitchenai.contrib.kitchenai_sdk.schema import EmbedSchema


embed_data = EmbedSchema(
    text="Some text to embed.",
    metadata={"type": "embeddings", "keyword": "AI"}
)

def simple_rag_bento_vagh(data: EmbedSchema):
    """
    Embed a text into a vector database. This is run in a background task.
    class EmbedSchema(Schema):
        text: str
        metadata: dict[str, str] | None = None
    Args:
        data: EmbedSchema

    Response:
        dict:
           Any
    """

    documents = [Document(text=data.text)]
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

    storage_context = StorageContext.from_defaults(vector_store=vector_store)
            
    VectorStoreIndex.from_documents(
        documents, storage_context=storage_context, show_progress=True,
            transformations=[TokenTextSplitter(), TitleExtractor(),QuestionsAnsweredExtractor()]
    )


result = simple_rag_bento_vagh(embed_data)




Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:00<00:00,  1.70it/s]
INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  0%|          | 0/1 [00:00<?, ?it/s]INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]


Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

INFO HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [18]:
%kitchenai_register_previous_cell embedding kitchenai-bento-simple-rag


kitchenai_result: Registered new function 'kitchenai-bento-simple-rag' of type 'embedding' with hash 15b94838b492d68ae75047b98efa41975c7dc1da44cdc5e290ef03d27641930d.


# Create a KitchenAI Module 

This transforms your registered functions into a module that can be served directly. To create it, run the %kitchenai_create_module magic command.

make sure this is the last cell in the notebook.


In [3]:
%kitchenai_create_module

INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


kitchenai_result: Created app.py
