In [2]:
import logging
import sys

from typing import List, Any, Optional, Dict, Tuple
from llama_index.vector_stores.types import (
    VectorStore,
    VectorStoreQuery,
    VectorStoreQueryResult,
)
from llama_index.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)

from llama_index import StorageContext, VectorStoreIndex, ServiceContext
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.schema import TextNode, BaseNode, Document
from llama_index import SimpleDirectoryReader
from llama_index.retrievers.auto_merging_retriever import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.node_parser import get_leaf_nodes, get_root_nodes
from llama_index.response.notebook_utils import display_source_node

import oci
import ads
from ads.llm import GenerativeAIEmbeddings, GenerativeAI
import oracledb

from config import INPUT_FILES
from config_private import COMPARTMENT_OCID, ENDPOINT

from oci_utils import load_oci_config

#this is the class realized for the integration with Llama-index
from oracle_vector_db import OracleVectorStore

In [3]:
# setup
oci_config = load_oci_config()

# need to do this way
api_keys_config = ads.auth.api_keys(oci_config)

# english, or for other language use: multilingual
MODEL_NAME = "cohere.embed-english-v3.0"

embed_model = GenerativeAIEmbeddings(
    compartment_id=COMPARTMENT_OCID,
    model=MODEL_NAME,
    auth=ads.auth.api_keys(oci_config),
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={
        "service_endpoint": ENDPOINT
    },
)

In [4]:
pages = SimpleDirectoryReader(input_files=["./oracle-database-23c-new-features-guide.pdf"]).load_data()

In [5]:
doc_text = "\n\n".join([d.get_content() for d in pages])
docs = [Document(text=doc_text)]

In [6]:
chunk_sizes = [2048, 512, 256]

node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes)
nodes = node_parser.get_nodes_from_documents(docs)

In [7]:
len(nodes)

319

In [8]:
leaf_nodes = get_leaf_nodes(nodes)

len(leaf_nodes)

208

In [9]:
v_store = OracleVectorStore(verbose=True)

In [10]:
llm_oci = GenerativeAI(
    compartment_id=COMPARTMENT_OCID,
    max_tokens=1024,
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={
        "service_endpoint": ENDPOINT
    },
)

In [11]:
service_context = ServiceContext.from_defaults(llm=llm_oci, embed_model=embed_model)
storage_context = StorageContext.from_defaults(vector_store=v_store)
storage_context.docstore.add_documents(nodes)

In [12]:
base_index = VectorStoreIndex(leaf_nodes,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True
)

Generating embeddings:   0%|          | 0/208 [00:00<?, ?it/s]

In [17]:
# before query, we need this
base_index.vector_store.persist()

2023-12-19 15:37:29,864 - INFO - Persisting to DB...
2023-12-19 15:37:30,163 - INFO - Saving embeddings to DB...
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 208/208 [00:18<00:00, 11.21it/s]
2023-12-19 15:37:48,732 - INFO - Saving texts to DB...
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 208/208 [00:30<00:00,  6.81it/s]


In [18]:
base_retriever = base_index.as_retriever(similarity_top_k=10, verbose=True)

retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [19]:
query_engine = RetrieverQueryEngine.from_args(retriever, service_context=service_context)

In [21]:
%%time

question = "Write an email where you highlight advantage coming from the adoption of Data Guard for Disaster Recovery?"

response = query_engine.query(question)

print(f"Question: {question}") 
print(response.response)
print("")

2023-12-19 15:39:20,056 - INFO - ---> Calling query on DB


select: select V.id, C.CHUNK, ROUND(VECTOR_DISTANCE(V.VEC, :1, DOT), 3)
                            as d from VECTORS V, CHUNKS C
                            where C.ID = V.ID
                            order by d
                            FETCH FIRST 10 ROWS ONLY


2023-12-19 15:39:21,002 - INFO - Query duration: 0.9 sec.


Question: Write an email where you highlight advantage coming from the adoption of Data Guard for Disaster Recovery?
Dear [Insert Name],

I would like to bring to your attention the advantages of using Data Guard for disaster recovery. Data Guard is an essential component of any enterprise-level database management system, particularly for Oracle databases. Here are some key benefits of adopting Data Guard:

1. Increases Fault Tolerance and High Availability: Implementing Data Guard ensures that your database can continue operating even in the event of a failure. It provides a reliable backup system that enables continuous data availability and minimizes downtime. With Data Guard, your database can withstand failures such as server crashes, network issues, or even data corruption.

2. Enhances Scalability: Data Guard facilitates the distribution of data across multiple databases, a process known as sharding. This feature improves scalability by allowing databases to handle increased da