In [1]:
import os
import gradio as gr

# my OracleVectorStore for LangChain
from oracle_vector_db_lc import OracleVectorStore

from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import OCIGenAIEmbeddings
from langchain_community.llms import OCIGenAI

from utils import format_docs

from config_private import COMPARTMENT_OCID

In [2]:
# per il tracing

os.environ["TOKENIZERS_PARALLELISM"] = "false"

EMBED_MODEL = "cohere.embed-multilingual-v3.0"
GENAI_MODEL = "cohere.command"
ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

TOP_K = 4

embed_model = OCIGenAIEmbeddings(
    auth_type="API_KEY",
    model_id=EMBED_MODEL,
    service_endpoint=ENDPOINT,
    compartment_id=COMPARTMENT_OCID,
)

llm = OCIGenAI(
    auth_type="API_KEY",
    model_id=GENAI_MODEL,
    service_endpoint=ENDPOINT,
    compartment_id=COMPARTMENT_OCID,
    model_kwargs={
        "max_tokens": 1024,
        "temperature": 0.1,
    },
)

# the prompt. This is OK for Cohere
prompt = hub.pull("rlm/rag-prompt")

# build AI Vector Search Vector Store
v_store = OracleVectorStore(
    embedding=embed_model, collection_name="ORACLE_KNOWLEDGE", verbose=True
)

In [3]:
retriever = v_store.as_retriever(search_kwargs={"k": TOP_K})

In [4]:
# using LangChain LCEL language
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [5]:
question = "What is Oracle Strategy for Generative AI?"

In [6]:
%%time
answer = rag_chain.invoke(question)

print(f"Question: {question}")
print("")
print(answer)
print("")

2024-02-26 18:07:25,132 - INFO - top_k: 4
2024-02-26 18:07:25,133 - INFO - 
2024-02-26 18:07:26,049 - INFO - select: select C.id, C.CHUNK, C.REF, 
                            ROUND(VECTOR_DISTANCE(C.VEC, :1, DOT), 3) as d 
                            from ORACLE_KNOWLEDGE C
                            order by d
                            FETCH FIRST 4 ROWS ONLY
2024-02-26 18:07:26,267 - INFO - Query duration: 0.4 sec.


Question: What is Oracle Strategy for Generative AI?

 Oracle's strategy for generative AI centers around providing enterprises with an entire stack of integrated services, from data storage to applications. They aim to make working with AI simpler by providing enterprises with services that are tailored to their needs, with Oracle's value starting at the top of the stack rather than in silicon. Oracle plans to achieve this by offering generative AI across its Fusion SaaS applications, supported by autonomous databases with vector embeddings and run on high-performance infrastructure. This strategy is intended to deliver an integrated and seamless AI experience to enterprises, allowing them to scale solutions on demand, customize models, and create private model endpoints for business. 

Oracle also aims to provide enterprises with generative AI capabilities that are high performing and cost-effective, adapting models to real-world enterprise scenarios and training large language model

In [7]:
# to format output


def answer(question):
    answer = rag_chain.invoke(question)

    return answer


demo = gr.Interface(
    title="Semantic Search on OCI Knowledge",
    fn=answer,
    inputs=[gr.Textbox(lines=2, label="Question")],
    outputs=gr.Textbox(lines=10, interactive=False, label="Answer"),
    allow_flagging="never",
    analytics_enabled=False,
)

demo.launch(share=True)

2024-02-26 18:08:02,878 - INFO - HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
2024-02-26 18:08:02,886 - INFO - HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"


Running on local URL:  http://127.0.0.1:7860


2024-02-26 18:08:08,544 - INFO - HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
2024-02-26 18:08:08,634 - INFO - HTTP Request: GET https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64 "HTTP/1.1 200 OK"
2024-02-26 18:08:19,311 - INFO - top_k: 4
2024-02-26 18:08:19,312 - INFO - 
2024-02-26 18:08:19,312 - INFO -  2024-02-26 17:08:19.312975: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText
2024-02-26 18:08:19,829 - INFO - select: select C.id, C.CHUNK, C.REF, 
                            ROUND(VECTOR_DISTANCE(C.VEC, :1, DOT), 3) as d 
                            from ORACLE_KNOWLEDGE C
                            order by d
                            FETCH FIRST 4 ROWS ONLY
2024-02-26 18:08:20,039 - INFO - Query duration: 0.4 sec.
2024-02-26 18:08:20,042 - INFO -  2024-02-26 17:08:20.042215: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/genera


Could not create share link. Missing file: /Users/lsaetta/miniforge3/envs/langchain01/lib/python3.9/site-packages/gradio/frpc_darwin_arm64_v0.2. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64
2. Rename the downloaded file to: frpc_darwin_arm64_v0.2
3. Move the file to this location: /Users/lsaetta/miniforge3/envs/langchain01/lib/python3.9/site-packages/gradio







2024-02-26 18:09:03,010 - INFO - top_k: 4
2024-02-26 18:09:03,011 - INFO - 
2024-02-26 18:09:03,013 - INFO -  2024-02-26 17:09:03.013738: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText
2024-02-26 18:09:03,481 - INFO - select: select C.id, C.CHUNK, C.REF, 
                            ROUND(VECTOR_DISTANCE(C.VEC, :1, DOT), 3) as d 
                            from ORACLE_KNOWLEDGE C
                            order by d
                            FETCH FIRST 4 ROWS ONLY
2024-02-26 18:09:03,700 - INFO - Query duration: 0.4 sec.
2024-02-26 18:09:03,715 - INFO -  2024-02-26 17:09:03.715132: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/generateText
2024-02-26 18:09:55,009 - INFO - top_k: 4
2024-02-26 18:09:55,010 - INFO - 
2024-02-26 18:09:55,012 - INFO -  2024-02-26 17:09:55.012309: Request: POST https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedT