### Oracle Vector DB wrapped as a llama-index custom Vector Store

* ispired by: https://docs.llamaindex.ai/en/stable/examples/low_level/vector_store.html

In [1]:
import logging
import sys

from typing import List, Any, Optional, Dict, Tuple
from llama_index.vector_stores.types import (
    VectorStore,
    VectorStoreQuery,
    VectorStoreQueryResult,
)
from llama_index import StorageContext, VectorStoreIndex, ServiceContext
from llama_index.schema import TextNode, BaseNode, Document

import oci
import ads

# only
import oracledb
from oci_utils import load_oci_config
from ads.llm import GenerativeAIEmbeddings, GenerativeAI
from oracle_vector_db import OracleVectorStore

from config import EMBED_MODEL
from config_private import COMPARTMENT_OCID, ENDPOINT

In [2]:
# version I'm using
print(f"oracledb version: {oracledb.__version__}")
print(f"oci version: {oci.__version__}")

oracledb version: 2.0.0.dev20231121
oci version: 2.112.1+preview.1.1649


In [3]:
# for debugging
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
# setup
oci_config = load_oci_config()

# need to do this way
api_keys_config = ads.auth.api_keys(oci_config)

# english, or for other language use: multilingual

embed_model = GenerativeAIEmbeddings(
    compartment_id=COMPARTMENT_OCID,
    model=EMBED_MODEL,
    auth=ads.auth.api_keys(oci_config),
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={"service_endpoint": ENDPOINT},
)

#### Using the wrapper for the DB Vector Store

In [5]:
v_store = OracleVectorStore(verbose=True)

In [6]:
question = (
    "What is JSON Relational Duality in Oracle Database 23c? Explain with details"
)

In [7]:
# embed the query using OCI GenAI
query_embedding = embed_model.embed_documents([question])[0]

#  wrap in llama-index
query_obj = VectorStoreQuery(query_embedding=query_embedding, similarity_top_k=6)

#### Use our Vector Store DB

In [8]:
%%time

q_result = v_store.query(query_obj)

2023-12-23 12:54:18,955 - INFO - ---> Calling query on DB
2023-12-23 12:54:19,182 - INFO - select: select V.id, C.CHUNK, C.PAGE_NUM, 
                            ROUND(VECTOR_DISTANCE(V.VEC, :1, DOT), 3) as d 
                            from VECTORS V, CHUNKS C
                            where C.ID = V.ID
                            order by d
                            FETCH FIRST 6 ROWS ONLY
2023-12-23 12:54:19,482 - INFO - Query duration: 0.5 sec.


CPU times: user 22.2 ms, sys: 6.52 ms, total: 28.7 ms
Wall time: 529 ms


In [9]:
for n, id, sim in zip(q_result.nodes, q_result.ids, q_result.similarities):
    print(f"Dod. id: {id}")
    print(f"Similarity: {-sim}")
    print(n.text)
    print("")

Dod. id: abc7952c8991ff8be803eb97c2b1d5ba29e07996592b4fb31ebaa1bbdcf81e0f
Similarity: 0.557
Oracle® Database Oracle Database New Features Release 23c F48428-15 October 2023 

Dod. id: fe036f82288fa16221c21ba603e60ad35ae556abfa3d35bed5fe1d360ee4786d
Similarity: 0.547
Contents Preface Audience xxiii Documentation Accessibility xxiii Related Documentation xxiv Conventions xxiv 1 Introduction to Oracle Database About Relational Databases 1-1 Database Management System (DBMS) 1-2 Relational Model 1-2 Relational Database Management System (RDBMS) 1-3 Brief History of Oracle Database 1-3 Schema Objects 1-5 Tables 1-5 Indexes 1-6 Data Access 1-6 Structured Query Language (SQL) 1-6 PL/SQL, Java, and JavaScript 1-7 Transaction Management 1-8 Transactions 1-8 Data Concurrency 1-8 Data Consistency 1-9 Oracle Database Architecture 1-9 Database and Instance 1-10 Multitenant Architecture 1-10 Sharding Architecture 1-14 Database Storage Structures 1-15 Physical Storage Structures 1-15 Logical Storage 

#### Integrate in the bigger RAG picture

In [10]:
llm_oci = GenerativeAI(
    compartment_id=COMPARTMENT_OCID,
    max_tokens=1024,
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={"service_endpoint": ENDPOINT},
)

In [11]:
service_context = ServiceContext.from_defaults(llm=llm_oci, embed_model=embed_model)

In [12]:
index = VectorStoreIndex.from_vector_store(
    vector_store=v_store, service_context=service_context
)

In [13]:
query_engine = index.as_query_engine(similarity_top_k=5)

In [14]:
%%time

response = query_engine.query(question)

print(f"Question: {question}")
print(response.response)
print("")

2023-12-23 12:54:20,817 - INFO - ---> Calling query on DB
2023-12-23 12:54:20,989 - INFO - select: select V.id, C.CHUNK, C.PAGE_NUM, 
                            ROUND(VECTOR_DISTANCE(V.VEC, :1, DOT), 3) as d 
                            from VECTORS V, CHUNKS C
                            where C.ID = V.ID
                            order by d
                            FETCH FIRST 5 ROWS ONLY
2023-12-23 12:54:21,260 - INFO - Query duration: 0.4 sec.


Question: What is JSON Relational Duality in Oracle Database 23c? Explain with details
JSON Relational Duality is a feature of the Oracle Database 23c that allows you to store data in a normalized relational format while providing flexible and simple access to applications in the form of JSON documents. Here's a detailed explanation:

Traditional relational databases store data in tables with rows and columns, following a structured and normalized format. While this approach excels at handling structured data, it can pose challenges when dealing with semi-structured or unstructured data like JSON.

JSON Relational Duality in Oracle Database 23c offers a solution by allowing you to create "Duality Views." These views present relational data stored in tables as JSON documents, providing a document-oriented interface to applications. The key advantage is that you get the best of both worlds: the efficient storage and querying capabilities of a relational database, along with the flexibili

#### Additional tests