### RAG and Citations using Oracle Vector Store and Cohere Command-R
* show how to use **citations** and have **more grounded answers**

In [None]:
import logging
from pprint import pprint

import oracledb

from langchain_community.vectorstores.oraclevs import OracleVS
from langchain_community.vectorstores.utils import DistanceStrategy
from oci_command_r_oo import OCICommandR

# to compute embeddings vectors
from langchain_community.embeddings import OCIGenAIEmbeddings

from utils import load_configuration

# private information
from config_private import COMPARTMENT_ID, DB_USER, DB_PWD, DB_HOST_IP, DB_SERVICE

#### Settings

In [None]:
# Configure logging
logger = logging.getLogger("ConsoleLogger")

logger.setLevel(logging.INFO)

# load the config in the config.toml file
config = load_configuration()

# embeddings model: we're using OCI GenAI multilingual Cohere
OCI_EMBED_MODEL = config["embeddings"]["oci"]["embed_model"]
EMBED_ENDPOINT = config["embeddings"]["oci"]["embed_endpoint"]

LLM_ENDPOINT = config["llm"]["oci"]["endpoint"]

# number of docs retrieved for each query
# reduced from config to simplify output here
TOP_K = 4

# for AI Vector Search
# to connect to DB
# default port is 1521
DSN = f"{DB_HOST_IP}:1521/{DB_SERVICE}"

print("The complete configuration is:")
print()
pprint(config)

In [None]:
# utility function
def print_metadata(v_metadata):
    """
    this is the format:
    {'source': './books/oracle-ai-vector-search-users-guide.pdf', 'page': 0}
    """
    print(f"- Source: {v_metadata['source']}, page: {v_metadata['page']}")

In [None]:
# create client for Embeddings and AI Vector Search

# Embed model here is needed to embed the query!
# for embeddings we're using the extension that handles batching
embed_model = OCIGenAIEmbeddings(
    auth_type="API_KEY",
    model_id=OCI_EMBED_MODEL,
    service_endpoint=EMBED_ENDPOINT,
    compartment_id=COMPARTMENT_ID,
)

# create the Vector Store (OracleVS)
#
#
try:
    # we need to provide a connection as input to OracleVS
    connection = oracledb.connect(user=DB_USER, password=DB_PWD, dsn=DSN)
    logger.info("Connection successful!")

    # get an instance of OracleVS
    v_store = OracleVS(
        client=connection,
        table_name="ORACLE_KNOWLEDGE",
        distance_strategy=DistanceStrategy.COSINE,
        embedding_function=embed_model,
    )

except Exception as e:
    logger.error("Connection failed!")
    logger.error(e)

In [None]:
# create a Langchain retriever from the Vector Store
retriever = v_store.as_retriever(search_kwargs={"k": TOP_K})

logger.info("Retriever created...")

#### Set the query

In [None]:
# query = "Could you explain what is JSON Relational Duality in Oracle Database 23c?"
query = "What is the VECTOR type in Oracle Database and how it is used?"

#### The semantic search using AI Vector Search

In [None]:
%%time
# first let's test the semantic search

result_docs = retriever.invoke(query)

# display results
print("")
print("--- Document retrieved from the knowledge base ---")
print()

for i, doc in enumerate(result_docs):
    print("-------------------------------------------")
    print(f"Document n. {i+1}")
    print("")
    print("- Content:")
    print(doc.page_content)
    print("")
    print_metadata(doc.metadata)
    print("-------------------------------------------")
    print("")

#### Get the answer from Cohere Command-R

In [None]:
# for now, no history
chat_history = []

# Cohere wants a map
# take the output from the AI Vector Search
# and trasnform in a format suitable for Cohere command-r
documents_txt = [
    {
        "id": str(i + 1),
        "snippet": doc.page_content,
        "source": doc.metadata["source"],
        "page": str(doc.metadata["page"]),
    }
    for i, doc in enumerate(result_docs)
]

command_r_params = {
    "model": "cohere.command-r-16k",
    "service_endpoint": LLM_ENDPOINT,
    "compartment_id": COMPARTMENT_ID,
    "max_tokens": 1024,
}
# this is a custom class that wraps OCI Python SDK
chat = OCICommandR(**command_r_params)

response = chat.invoke(query=query, chat_history=[], documents=documents_txt)

In [None]:
# have another look at the query
query = "What is the VECTOR type in Oracle Database and how it is used?"

In [None]:
# here we get the text of the answer
print(f"Query: {query}")
print("")
print("Answer:")
print(response.data.chat_response.text)

In [None]:
print(f"Document used to answer:")
print()

for doc in response.data.chat_response.documents:
    print(doc)
    print("")

In [None]:
print(f"Citations:")
print()

for citation in response.data.chat_response.citations:
    print(citation)
    print("")