In [1]:
import os
import gradio as gr

# my OracleVectorStore for LangChain
from oracle_vector_db_lc import OracleVectorStore

from langchain_community.embeddings import OCIGenAIEmbeddings

from config_private import COMPARTMENT_OCID

#### Setup

In [2]:
# per il tracing

os.environ["TOKENIZERS_PARALLELISM"] = "false"

EMBED_MODEL = "cohere.embed-multilingual-v3.0"
ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

embed_model = OCIGenAIEmbeddings(
    auth_type="API_KEY",
    model_id=EMBED_MODEL,
    service_endpoint=ENDPOINT,
    compartment_id=COMPARTMENT_OCID,
)

# build AI Vector Search Vector Store
v_store = OracleVectorStore(
    embedding=embed_model, collection_name="ORACLE_KNOWLEDGE", verbose=True
)

#### Demo con Gradio

In [3]:
# to format output
def format_doc(doc, choice):
    ref_name = doc.metadata["source"]

    if choice == "Full":
        output = doc.page_content + "\n\n"
        output += ref_name
    else:
        output = ref_name

    # separatore
    output += f"\n\n--------------------------\n\n"

    return output


# costruisce il retrieve... consente di cambiare top_K
def get_retriever(top_k):
    retriever = v_store.as_retriever(search_kwargs={"k": top_k})

    return retriever


def retrieve(question, choice, top_k):
    retriever = get_retriever(top_k)

    # fa il retrieval
    result_docs = retriever.get_relevant_documents(question)

    # costruisce l'output
    output = ""

    for doc in result_docs:
        output += format_doc(doc, choice)

    return output


demo = gr.Interface(
    title="Semantic Search on OCI Knowledge",
    fn=retrieve,
    inputs=[
        gr.Textbox(lines=2, label="Question"),
        gr.Radio(["Full", "Only ref"], label="Select output type", value="Full"),
        gr.Slider(minimum=2, maximum=10, step=1, label="TOP_K", value=5),
    ],
    outputs=gr.Textbox(lines=10, interactive=False, label="Documents"),
    allow_flagging="never",
    analytics_enabled=False,
)

demo.launch(share=True)

2024-02-26 17:01:41,475 - INFO - HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
2024-02-26 17:01:41,482 - INFO - HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"


Running on local URL:  http://127.0.0.1:7860


2024-02-26 17:01:50,404 - INFO - HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
2024-02-26 17:01:50,541 - INFO - HTTP Request: GET https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64 "HTTP/1.1 200 OK"
2024-02-26 17:02:10,502 - INFO - top_k: 5
2024-02-26 17:02:10,503 - INFO - 
2024-02-26 17:02:10,504 - INFO -  2024-02-26 16:02:10.504617: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText
2024-02-26 17:02:13,046 - INFO - select: select C.id, C.CHUNK, C.REF, 
                            ROUND(VECTOR_DISTANCE(C.VEC, :1, DOT), 3) as d 
                            from ORACLE_KNOWLEDGE C
                            order by d
                            FETCH FIRST 5 ROWS ONLY
2024-02-26 17:02:13,282 - INFO - Query duration: 0.5 sec.



Could not create share link. Missing file: /Users/lsaetta/miniforge3/envs/langchain01/lib/python3.9/site-packages/gradio/frpc_darwin_arm64_v0.2. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64
2. Rename the downloaded file to: frpc_darwin_arm64_v0.2
3. Move the file to this location: /Users/lsaetta/miniforge3/envs/langchain01/lib/python3.9/site-packages/gradio







2024-02-26 17:03:12,264 - INFO - top_k: 5
2024-02-26 17:03:12,266 - INFO - 
2024-02-26 17:03:12,267 - INFO -  2024-02-26 16:03:12.267701: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText
2024-02-26 17:03:12,765 - INFO - select: select C.id, C.CHUNK, C.REF, 
                            ROUND(VECTOR_DISTANCE(C.VEC, :1, DOT), 3) as d 
                            from ORACLE_KNOWLEDGE C
                            order by d
                            FETCH FIRST 5 ROWS ONLY
2024-02-26 17:03:13,004 - INFO - Query duration: 0.4 sec.
