# Query an existing Chroma DB with GUI (Gradio)
Sample of how to querty a Chroma Vector Database oriented to creating a RAG system.

In [14]:
# https://python.langchain.com/docs/integrations/vectorstores/chroma/

In [15]:
#!pip install chromadb sentence-transformers langchain_huggingface langchain_chroma

In [16]:
!pip install gradio

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




Select **model** for embeddings. 

We must to select the same model of existing embeddings in database

In [17]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

Access existing database

In [18]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="some_facts",
    embedding_function=embeddings,
    persist_directory="./chroma_some_facts",  # Where to save data locally, remove if not necessary
)

## Searching

In [19]:
# you don't need to run this section

In [20]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=3,
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content}")

* [SIM=0.809472] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.
* [SIM=1.515494] I have a bad feeling I am going to get deleted :(
* [SIM=1.540267] I had chocolate chip pancakes and scrambled eggs for breakfast this morning.


In [21]:
# we can configure a 'RETRIEVER', a key component in Langchain used to find relevant information from document collections

In [22]:
retriever = vector_store.as_retriever(
    search_type="similarity",  search_kwargs={"k": 3}
)
retriever.invoke("Stealing from the bank is a crime")

[Document(metadata={}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

## Create a Gradio interface

In [23]:
# Create a function to make the query to the database 
# We use almost the same code that previusly, but in function style ;-)

In [24]:
def search_chroma(query, top_k):
    try:   
        results = vector_store.similarity_search(query, k=top_k)
        return "\n\n".join(
            [f"**Result {i+1}:**\n{doc.page_content}" for i, doc in enumerate(results)]
        )
    except Exception as e:
        return f"Error: {e}"
    

In [25]:
import gradio as gr

In [26]:
with gr.Blocks() as demo:
    gr.Markdown("### Chroma Database Search")
    
    with gr.Row():
        query_input = gr.Textbox(label="Enter Your Query", placeholder="Type your question here...")
        top_k_input = gr.Slider(1, 10, step=1, value=5, label="Number of Results")

    search_button = gr.Button("Search")
    output_box = gr.Textbox(label="Search Results", lines=15)

    # Bind the function to the Gradio UI
    search_button.click(fn=search_chroma, inputs=[query_input, top_k_input], outputs=output_box)

# Launch the App
demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [27]:
# We can do it with a retriever

In [33]:
retriever = vector_store.as_retriever(
    search_type="similarity",  search_kwargs={"k": 3}
)

def search_with_retriever(query, top_k):
    try:
        retriever.search_kwargs["k"] = top_k  # Dynamically set the number of results
        results = retriever.get_relevant_documents(query)
        return "\n\n".join(
            [f"**Result {i+1}:**\n{doc.page_content}" for i, doc in enumerate(results)]
        )
    except Exception as e:
        return f"Error: {e}"

In [34]:
with gr.Blocks() as demo:
    gr.Markdown("### Chroma Database Search")
    
    with gr.Row():
        query_input = gr.Textbox(label="Enter Your Query", placeholder="Type your question here...")
        top_k_input = gr.Slider(1, 10, step=1, value=5, label="Number of Results")

    search_button = gr.Button("Search")
    output_box = gr.Textbox(label="Search Results", lines=15)

    # Bind the function to the Gradio UI
    search_button.click(fn=search_with_retriever, inputs=[query_input, top_k_input], outputs=output_box)

# Launch the App
demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


